If you want a bit more flexible - for example: by choosing the characters that identify a word - you could have a look at this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// adjust size for your needs
#define MAX_WORD_LEN 1000
static char *parseable_characters_str = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxy0123456789-";
static char parseable_characters_tbl[256] = {0}; // lookup index table, stores yes/no -> allowed/not allowed
/*
* builds the lookup table
*/
void build_lookup_index(char *table, const char *str)
{
int i;
// init table to zero
memset(table,0,256);
// set value to 1 at ASCII-code offset of the array if the character is allowed to be
// part of the word
for (i=0; str[i]; i++)
table[(unsigned char)str[i]] = 1;
}
/*
* returns unparsed bytes (kind of offset for next reading operation)
*/
int parse_buffer(char *buf, int size, const char *lookup_table)
{
int i,l,s;
char word[MAX_WORD_LEN+1];
i = 0;
l = 0;
s = 0;
while (i<size) {
// character not in lookup table -> delimiter
if (!lookup_table[buf[i]] || !buf[i]) {
if (l >= MAX_WORD_LEN) {
fprintf(stderr,"word exceeds bounds\n");
}
else if (l > 0) { // if word has at least 1 character...
// append string-terminator
word[l] = '\0';
printf("word found (%d): '%s'\n",l,word);
}
// reset word length
l = 0;
// save last word offset
s = i+1;
}
else {
// prevent buffer overflows
if (l < MAX_WORD_LEN)
word[l] = buf[i];
l++;
}
if (!buf[i])
break;
i++;
}
if (s > 0 && size-s > 0) {
// move rest of the buffer to the start for next iteration step
memmove(buf,buf+s,size-s);
return size-s;
}
return 0;
}
int main(int argc, char *argv[])
{
FILE *fh;
char buf[1000]; // read buffer
// "rb" because of Windows - we want all characters to be read
fh = fopen("out.txt","rb");
// initialize word index
build_lookup_index(parseable_characters_tbl,parseable_characters_str);
if (fh) {
int r,off = 0;
while (!feof(fh)) {
r = fread(buf+off,1,sizeof(buf)-off,fh);
off = parse_buffer(buf,r,parseable_characters_tbl);
}
fclose(fh);
}
return 0;
}