tags:

views:

186

answers:

1

I'm trying to count the number of words in a file with strtok().

/*
 * code.c
 *
 * WHAT
 *      Use strtok() to count the number of words in a file.
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define STRMAX 128

int main() {
    /* Declarations */
    FILE* fptr;
    int iCntr = 0;
    char sLine[STRMAX];
    char* cPToken;

    /* Read file */
    /* Error handler */
    if ((fptr = fopen("/home/ubuntu/Dropbox/Unief/C/H18/Opdr01/Debug/test.txt", "r")) == NULL) {
        printf("Couldn't read test.txt.\n");
        exit(0);
    } else {
        while (fgets(sLine, STRMAX-1, fptr) != NULL) {                  /* Read line */
            while ((cPToken = strtok(sLine, ".,; !?\r\n")) != NULL) {   /* Split into words */
                iCntr++;
            }
        }
        printf("Number of words: %d\n", iCntr);
    }

    /* Always clean up your mess */
    fclose(fptr);
    return 0;
}

This causes an infinite loop. Why?

+3  A: 

You need two calls, the second time you need to pass NULL to strtok.

Instead of:

while ((cPToken = strtok(sLine, ".,; !?\r\n")) != NULL) {  /* Split into words */
                iCntr++;
}

do

cPToken = strtok(sLine, ".,; !?\r\n");
while (cPToken != NULL) {   /* Split into words */
     iCntr++; /* we have a valid word */
     cPToken = strtok(NULL, ".,; !?\r\n");          
}

Edit: Full source:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

size_t wcount(const char *fname, const char *delim) {
    char buf[ 512 ];
    size_t nw = 0;
    FILE *fp = fopen(fname, "r");
    if (fp) {
        while (fgets(buf, sizeof buf, fp) != NULL) {
            for (char *w = strtok(buf, delim); w; w = strtok(NULL, delim))
                nw++;
        }
        fclose(fp);
    }
    return nw;
}

int main(int argc, char* argv[])
{
    printf("%u\n", wcount("C:\\sample.txt", ".,; !?\r\n"));
    return 0;
} 

With your input file, I get the result as 16.

Edit# 2: Modifying your source:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define STRMAX 128

int main() {
    /* Declarations */
    FILE* fptr;
    int iCntr = 0;
    char sLine[STRMAX];
    char* cPToken;

    /* Read file */
    /* Error handler */
    if ((fptr = fopen("c:\\test.txt", "r")) == NULL) {
        printf("Couldn't read test.txt.\n");
        exit(0);
    } else {
        while (fgets(sLine, STRMAX-1, fptr) != NULL) {                  /* Read line */
            cPToken = strtok(sLine, ".,; !?\r\n");
            while (cPToken != NULL) {   /* Split into words */
                iCntr++;
                cPToken = strtok(NULL, ".,; !?\r\n");
            }
        }
        printf("Number of words: %d\n", iCntr);
    }

    /* Always clean up your mess */
    fclose(fptr);
    return 0;
}

I get the same result -- 16.

dirkgently
That doesn't accurately represent the number of words in my text file, as far as I can tell.
Pieter
@Pieter: You need to call `strtok` a second time. That is how it works. And the first parameter must be `NULL` for the second call (which is usually wrapped in a loop). You may need to rearrange the counter increment. The code I posted was intended to show how to call `strtok` only.
dirkgently
@Pieter "as far as i can tell" is a little vague feedback, try with a text file with a known word count and use it to verify wether or not the solution works.
kb
Text file:`Here are four words. Here are four more! Now we have a total of sixteen words.`This returns `Number of words: 1`.
Pieter
Whoops, turns out I had messed up some of my code. Sorry about that.
Pieter