Me gustaría compartir con todos vosotros, mis primeros pasos en CUDA.
Usé pyCuda y algo de código en C. Se trata de un crackeador de MD5. No es muy rápido, pero puede servir como ilustrativo para los que deis vuestros primeros pasos con Cuda y PyCuda.
Muy mejorable, pero ahí está. El algoritmo de crackeo de MD5 en el subprograma en C, es otro más lento que el de la RSA (RFC 1321), pero más compacto. Es muy sencillo en programa, consultad la documentación de CUDA y la de pyCuda para más información.
Cualquier consulta, no dudeis en comentar.

El código, a continuación: araMD5.py <-Descargar archivo python

 

from pycuda import gpuarray
import pycuda.driver as cuda
from pycuda.compiler import SourceModule
import numpy, re, sys, time

from pycuda.autoinit import context


mod = SourceModule("""
/*
 * md5.c
 *
 *  Created on: 22/11/2014
 *      Author: javier garcia glez.
 *
 *      Implementacion del cifrado MD5
 *
 *      Version 0.1
 *
 */
#include <stdio.h>
#include <stdlib.h>

typedef unsigned char byte;
typedef unsigned short int nibble;
typedef unsigned int word;
typedef unsigned long dword;

__device__ const word K[] = {
0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 
0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, 
0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 
0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, 
0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 
0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, 
0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 
0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, 
0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 
0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, 
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 
0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, 
0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 
0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, 
0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 
0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 };

__device__ const byte s[] = { 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 5, 9,
        14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 4, 11, 16, 23, 4, 11,
        16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 6, 10, 15, 21, 6, 10, 15, 21, 6,
        10, 15, 21, 6, 10, 15, 21 };



typedef struct dchunks {
    word data[16];
}  wchunk;

typedef union slength {
    word wdata;
    byte bdata[4];
} ulength;

__device__ void byteReverse(word *bf, word l) {
    word t = *bf, longs = (l > 0) ? l : 1;
    byte *buf = (byte *) bf, i;
    do {
        t = ((((word) buf[3] | (word) buf[2] << 8))
                | (((word) buf[1] << 16 | (word) buf[0] << 24)));
        *(word *) buf = t;
        buf += 4;
    } while (--longs > 0);
}


__global__ void md5(byte *object, word *pa, int pwlen) {

    int idx = threadIdx.x;
    
    byte pnobj[2048], *p;
    word vA, vB, vC, vD, tmppad, mod, g, a, b, c, d, chunks, f,  len,i;
    word temp, temp2, *pw;
    ulength length;
    
    vA = 0x67452301;
    vB = 0xefcdab89;
    vC = 0x98badcfe;
    vD = 0x10325476;

    // calculate padding
    len=0; p=(byte *)(object+idx*(pwlen+1));
    //printf("%s", p);
    /*while(*p!=0 && len<20){
        len++; p++;
    }*/
    len=pwlen;
    mod = (len % 64);
    tmppad = (word) mod > 56 ? 64 - (mod % 56) : 56 - mod;

    chunks = (len + tmppad + 8) / 64;
    // working mem string copy

    memset(pnobj, 0, 2048);
    memcpy(pnobj, (byte *)(object+idx*(pwlen+1)), len);

    *(pnobj + len) = 0x80;
    length.wdata = len << 3;
    *((word *) (pnobj + len + tmppad)) = length.wdata;

    for (pw = (word *)pnobj; pw < ((word *)pnobj + ((chunks) * 16) - 1); pw+=16) {
        a = vA;
        b = vB;
        c = vC;
        d = vD;

        for (i = 0; i < 64; i++) {

            if (i < 16) {
                f = (b & c) | ((~b) & d);
                g = i;
            } else {
                if (i < 32) {
                    f = (d & b) | ((~d) & c);
                    g = (5 * i + 1) % 16;
                } else {
                    if (i < 48) {
                        f = b ^ c ^ d;
                        g = (3 * i + 5) % 16;
                    } else {
                        if (i < 64) {
                            f = c ^ (b | (~d));
                            g = (7 * i) % 16;
                        }
                    }
                }
            }

            temp = d;
            d = c;
            c = b;
            temp2=(a + f + K[i]+*(pw+g));
            b += (word)((temp2 << (s[i])) | (temp2 >> (32 - s[i])) );
            a = temp;
        }

        vA += a;
        vB += b;
        vC += c;
        vD += d;
    }
    
    byteReverse((word *) &vA, 1);
    byteReverse((word *) &vB, 1);
    byteReverse((word *) &vC, 1);
    byteReverse((word *) &vD, 1);

    __syncthreads();
    pa[idx*4]=vA; pa[idx*4+1]=vB; pa[idx*4+2]=vC; pa[idx*4+3]=vD;
    return;
}

""", ''nvcc'', [], keep=True, no_extern_c=False, arch=None, code=None, cache_dir=None)

PWLIST = []
npasswords = 0

#HASH="900150983cd24fb0d6963f7d28e17f72"        
#HASH="0cc175b9c0f1b6a831c399e269772661"
#HASH="e1671797c52e15f763380b45e841ec32"
#HASH="0bf375b600c339bedc9b6f104b64ea66"
HASH="5d41402abc4b2a76b9719d911017c592"
#HASH = "32e6eb142601d344c188b94a8fe5f2cd"
MT = 512
FILLPASSLIMIT = MT * 4
npass=0

def getHash(a):
    return "%08x%08x%08x%08x" % (a[0], a[1], a[2], a[3]) 

def checkHash(a, passw):
    rhash = re.compile(HASH)
    for i in range(len(a) / 4):
        
        if rhash.match(getHash(a[i * 4:i * 4 + 4])):
            
            print "found pass: %s" % "".join(passw[i])
            tend = time.clock()
            print "%d passwords checked in %03.2f, %03d c/s" % (npasswords, (tend - tstart) / 100, npasswords/(tend - tstart)) 
            sys.exit(0)

def getChars(file):
    return list(open(file, "rb").read())[:-1]


def getIndex(data):
    return chars.index(data[-1])

def incData(data, PILA):
    global PWLIST, npass
    
    index = getIndex(data[-1])+1
    npass+=1
    if index>(len(chars)-1):
        if len(PILA)==0:
            PWLIST.append(list("".join(PILA)+"".join(reversed(data))+''\\0''))
            return -1
        else:
            PWLIST.append(list("".join(PILA)+"".join(reversed(data))+''\\0''))
            if PILA[-1]!=chars[-1]:
                data[-1]=chars[0]
            return 0
    else:
        data[-1]=chars[index]
        PWLIST.append(list("".join(PILA)+"".join(reversed(data))+''\\0''))
        return index 
    
def getPasswordsRow3(PILA):
    global PWLIST, npass

    data=[]
    npass=1
    
    PWLIST.append(list("".join(PILA)+''\\0''))
    while npass0:
            data.append(PILA.pop())
            r=incData(data, PILA)
        else:
            while len(data)>0:
                PILA.append(data.pop())
            else:
                if r==-1:
                    return len(PILA), PILA
    else:
        return len(PILA), PILA
    
def fillList2(chars, PILA):
    global nfillpasswords, PWLIST
    
    if PILA==list(chars[-1]*len(PILA)):
        n=0
        PILA = list(chars[0] * (len(PILA)+1))
        
    return getPasswordsRow3(PILA)
 
cuda.init()
chars = getChars("chars.txt")
length = 0
current_pass = 0
func = mod.get_function("md5")
tstart = time.clock()
nfillpasswords = 0
passwz = list()
length = 0

while True:
    
    
    if len(PWLIST)==0:
        nfillpasswords=0
        length, passwz = fillList2(chars, passwz)
        
    if len(PWLIST) < MT:
        M = len(PWLIST)
    else:
        M = MT

    for i in range((len(PWLIST) / M)):
         
        bpassw = numpy.array(PWLIST[i * M:i * M + M], ''S1'')
        
        pbpassw = cuda.mem_alloc(bpassw.nbytes)
        cuda.memcpy_htod(pbpassw, bpassw)
        
        a = gpuarray.zeros((M * 4), numpy.uint32)
        
        print "\\rchecking %s to %s %d threads\\r" % ("".join(PWLIST[i * M])[:-1], "".join(PWLIST[i * M + M - 1])[:-1], M),
        
        func(pbpassw, a, numpy.int32(len(passwz)), block=(M, 1, 1))
        context.synchronize()
        adata = a.get()
        npasswords += M
        checkHash(adata.tolist(), bpassw.tolist())
    else:
        del PWLIST[:(i+1)*M]
        
        
context.pop()
Currently unrated
  • Share

Comments

cellsk8 3 years, 4 months ago

Muy interesante, pero como instalo pycuda y cuda? Gracias!

Link | Reply
Currently unrated

javier 3 years, 4 months ago

hey, cellsk8,
Este desarrollo lo programé en Linux. Para instalar cuda puedes seguir uno de los muchos tutoriales que hay por la red, ya sea en windows o linux.
En el caso de pycuda, puedes usar pip o easy_install.
$> pip install pycuda
Si necesitas mas ayuda, te intentare ayudar.
Saludos.

Link | Reply
Currently unrated

New Comment

* Please fill all required form field, thanks!