Hi,
I need help please. I started to program a common brute forcer / password guesser with CUDA (2.3 / 3.0beta). I tried different ways to generate all possible plain text "candidates" of a defined ASCII char set.
In this sample code I want to generate all 74^4 possible combinations (and just output the result back to host/stdout).
$ ./combinations
Total number of combinations    : 29986576
Maximum output length   : 4
ASCII charset length    : 74
ASCII charset   : 0x30 - 0x7a
 "0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxy"
CUDA code (compiled with 2.3 and 3.0b - sm_10) - combinaions.cu:
#include <stdio.h>
#include <cuda.h>
__device__ uchar4 charset_global = {0x30, 0x30, 0x30, 0x30};
__shared__ __device__ uchar4 charset[128];
__global__ void combo_kernel(uchar4 * result_d, unsigned int N)
{
 int totalThreads = blockDim.x * gridDim.x ;
 int tasksPerThread = (N % totalThreads) == 0 ? N / totalThreads : N/totalThreads + 1;
 int myThreadIdx = blockIdx.x * blockDim.x + threadIdx.x ;
 int endIdx = myThreadIdx + totalThreads * tasksPerThread ;
 if( endIdx > N) endIdx = N;
 const unsigned int m = 74 + 0x30;
 for(int idx = myThreadIdx ; idx < endIdx ; idx += totalThreads) {
  charset[threadIdx.x].x = charset_global.x;
  charset[threadIdx.x].y = charset_global.y;
  charset[threadIdx.x].z = charset_global.z;
  charset[threadIdx.x].w = charset_global.w;
  __threadfence();
  if(charset[threadIdx.x].x < m) {
   charset[threadIdx.x].x++;
  } else if(charset[threadIdx.x].y < m) {
   charset[threadIdx.x].x = 0x30; // = 0
   charset[threadIdx.x].y++;
  } else if(charset[threadIdx.x].z < m) {
   charset[threadIdx.x].y = 0x30; // = 0
   charset[threadIdx.x].z++;
  } else if(charset[threadIdx.x].w < m) {
   charset[threadIdx.x].z = 0x30;
   charset[threadIdx.x].w++;; // = 0
  }
  charset_global.x = charset[threadIdx.x].x;
  charset_global.y = charset[threadIdx.x].y;
  charset_global.z = charset[threadIdx.x].z;
  charset_global.w = charset[threadIdx.x].w;
  result_d[idx].x = charset_global.x;
  result_d[idx].y = charset_global.y;
  result_d[idx].z = charset_global.z;
  result_d[idx].w = charset_global.w;
 }
}
#define BLOCKS 65535
#define THREADS 128
int main(int argc, char **argv)
{
 const int ascii_chars = 74;
 const int max_len = 4;
 const unsigned int N = pow((float)ascii_chars, max_len);
 size_t size = N * sizeof(uchar4);
 uchar4 *result_d, *result_h;
 result_h = (uchar4 *)malloc(size );
 cudaMalloc((void **)&result_d, size );
 cudaMemset(result_d, 0, size);
 printf("Total number of combinations\t: %d\n\n", N); 
 printf("Maximum output length\t: %d\n", max_len);
 printf("ASCII charset length\t: %d\n\n", ascii_chars);
 printf("ASCII charset\t: 0x30 - 0x%02x\n ", 0x30 + ascii_chars);
 for(int i=0; i < ascii_chars; i++)
  printf("%c",i + 0x30);
 printf("\n\n");
 combo_kernel <<< BLOCKS, THREADS >>> (result_d, N);
 cudaThreadSynchronize();
 printf("CUDA kernel done\n");
 printf("hit key to continue...\n");
 getchar();
 cudaMemcpy(result_h, result_d, size, cudaMemcpyDeviceToHost);
 for (unsigned int i=0; i<N; i++)
  printf("result[%06u]\t%c%c%c%c\n",i, result_h[i].x, result_h[i].y, result_h[i].z, result_h[i].w);
 free(result_h);
 cudaFree(result_d);
}
The code should compile without any problems but the output is not what i expected.
On emulation mode:
CUDA kernel done hit
key to continue...
    result[000000]  1000 
...
    result[000128]  5000
On release mode:
CUDA kernel done hit
key to continue...
    result[000000]  1000 
...
    result[012288]  5000
I also used __threadfence() and or __syncthreads() on different lines of the code also without success...
ps. if possible I want to generate everything inside of the kernel function . I also tried "pre" generating of possible plain text candidates inside host main function and memcpy to device, this works only with a very limited charset size (because of limited device memory).
any idea about the output, why the repeating (even with __threadfence() or __syncthreads()) ?
any other method to generate plain text (candidates) inside CUDA kernel fast :-) (~75^8) ?
thanks a million
greets jan