What I am trying to do is to generate some random numbers (not necessarily single digit) like
29106
7438
5646
4487
9374
28671
92
13941
25226
10076
and then count the number of digits I get:
count[0] = 3 Percentage = 6.82
count[1] = 5 Percentage = 11.36
count[2] = 6 Percentage = 13.64
count[3] = 3 Percentage = 6.82
count[4] = 6 Percentage = 13.64
count[5] = 2 Percentage = 4.55
count[6] = 7 Percentage = 15.91
count[7] = 5 Percentage = 11.36
count[8] = 3 Percentage = 6.82
count[9] = 4 Percentage = 9.09
This is the code I am using:
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
int main() {
int i;
srand(time(NULL));
FILE* fp = fopen("random.txt", "w");
// for(i = 0; i < 10; i++)
for(i = 0; i < 1000000; i++)
fprintf(fp, "%d\n", rand());
fclose(fp);
int dummy;
long count[10] = {0,0,0,0,0,0,0,0,0,0};
fp = fopen("random.txt", "r");
while(!feof(fp)) {
fscanf(fp, "%1d", &dummy);
count[dummy]++;
}
fclose(fp);
long sum = 0;
for(i = 0; i < 10; i++)
sum += count[i];
for(i = 0; i < 10; i++)
printf("count[%d] = %7ld Percentage = %5.2f\n",
i, count[i], ((float)(100 * count[i])/sum));
}
If I generate a large number of random numbers (1000000), this is the result I get:
count[0] = 387432 Percentage = 8.31
count[1] = 728339 Percentage = 15.63
count[2] = 720880 Percentage = 15.47
count[3] = 475982 Percentage = 10.21
count[4] = 392678 Percentage = 8.43
count[5] = 392683 Percentage = 8.43
count[6] = 392456 Percentage = 8.42
count[7] = 391599 Percentage = 8.40
count[8] = 388795 Percentage = 8.34
count[9] = 389501 Percentage = 8.36
Notice that 1, 2 and 3 have too many hits. I have tried running this several times and each time I get very similar results.
I am trying to understand what could cause 1, 2 and 3 to appear much more frequently than any other digit.
Taking hint from what Matt Joiner and Pascal Cuoq pointed out,
I changed the code to use
for(i = 0; i < 1000000; i++)
fprintf(fp, "%04d\n", rand() % 10000);
// pretty prints 0
// generates numbers in range 0000 to 9999
and this is what I get (similar results on multiple runs):
count[0] = 422947 Percentage = 10.57
count[1] = 423222 Percentage = 10.58
count[2] = 414699 Percentage = 10.37
count[3] = 391604 Percentage = 9.79
count[4] = 392640 Percentage = 9.82
count[5] = 392928 Percentage = 9.82
count[6] = 392737 Percentage = 9.82
count[7] = 392634 Percentage = 9.82
count[8] = 388238 Percentage = 9.71
count[9] = 388352 Percentage = 9.71
What can be the reason that 0, 1 and 2 are favored?
Thanks everyone. Using
int rand2(){
int num = rand();
return (num > 30000? rand2():num);
}
fprintf(fp, "%04d\n", rand2() % 10000);
I get
count[0] = 399629 Percentage = 9.99
count[1] = 399897 Percentage = 10.00
count[2] = 400162 Percentage = 10.00
count[3] = 400412 Percentage = 10.01
count[4] = 399863 Percentage = 10.00
count[5] = 400756 Percentage = 10.02
count[6] = 399980 Percentage = 10.00
count[7] = 400055 Percentage = 10.00
count[8] = 399143 Percentage = 9.98
count[9] = 400104 Percentage = 10.00