For various reasons I won't discuss here, I am writing a simple tokenizer in C. Below is an example I hacked out which resizes the token buffer in predetermined increments as necessary when reading characters from the input stream. It will ultimately reach the size of the largest token which can obviously accommodate smaller tokens. Is this an acceptable approach? If not, what is the best way to determine the correct amount of memory to allocate for each token?
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#define BUFF_CHUNK_SIZE 4
int main() {
int c, i, n;
char *buffer;
i = 0;
n = BUFF_CHUNK_SIZE;
buffer = (char *)calloc(n, sizeof(char));
while ((c = getchar()) != EOF) {
if (isalnum(c)) {
buffer[i] = (char)c;
i++;
if (i == n) {
n += BUFF_CHUNK_SIZE;
buffer = (char *)realloc(buffer, n * sizeof(char));
}
}
else {
if (i == 0) {
continue;
}
i = 0;
printf("%s\n", buffer);
memset(buffer, 0, sizeof(char) * n);
}
}
return 0;
}