What is the fastest way to count lines and words in a text file in pure ANSI C?
A word is terminated by a space or period. Line is terminated by '\n'
.
What is the fastest way to count lines and words in a text file in pure ANSI C?
A word is terminated by a space or period. Line is terminated by '\n'
.
EOF
Here is an explicit answer that counts the number of lines (extension to the number of words is trivial à la the C++ version linked to in OP). This version is buffered. Another answer suggests reading the entire file in first, which is simpler, but the below is more in line with what your C++ example does.
#include <stdio.h>
#include <string.h>
#define BUFSIZE 1024
int main(int argc, char** argv)
{
int newlines = 0;
char buf[BUFSIZE];
FILE* file;
if (argc != 2)
return 1;
file = fopen(argv[1], "r");
while (fgets(buf, BUFSIZE, file))
{
if (!(strlen(buf) == BUFSIZE-1 && buf[BUFSIZE-2] != '\n'))
newlines++;
}
printf("Number of lines in %s: %d\n", argv[1], newlines);
return 0;
}
The BUFSIZE macro can be tweaked to maximize performance (since you say you want the fastest way). 1024 is simply a guess. Another possibility is probably to read the file memory mapped, but I didn't try since mmap is not ANSI C.
Maybe take a look at the source code of the GNU wc utility as this utility does exactly what you want.
#include <stdlib.h>
#include <stdio.h>
#include <stdarg.h>
typedef unsigned long count_t; /* Counter type */
/* Current file counters: chars, words, lines */
count_t ccount;
count_t wcount;
count_t lcount;
/* Totals counters: chars, words, lines */
count_t total_ccount = 0;
count_t total_wcount = 0;
count_t total_lcount = 0;
/* Print error message and exit with error status. If PERR is not 0,
display current errno status. */
static void
error_print (int perr, char *fmt, va_list ap)
{
vfprintf (stderr, fmt, ap);
if (perr)
perror (" ");
else
fprintf (stderr, "\n");
exit (1);
}
/* Print error message and exit with error status. */
static void
errf (char *fmt, ...)
{
va_list ap;
va_start (ap, fmt);
error_print (0, fmt, ap);
va_end (ap);
}
/* Print error message followed by errno status and exit
with error code. */
static void
perrf (char *fmt, ...)
{
va_list ap;
va_start (ap, fmt);
error_print (1, fmt, ap);
va_end (ap);
}
/* Output counters for given file */
void
report (char *file, count_t ccount, count_t wcount, count_t lcount)
{
printf ("%6lu %6lu %6lu %s\n", lcount, wcount, ccount, file);
}
/* Return true if C is a valid word constituent */
static int
isword (unsigned char c)
{
return isalpha (c);
}
/* Increase character and, if necessary, line counters */
#define COUNT(c) \
ccount++; \
if ((c) == '\n') \
lcount++;
/* Get next word from the input stream. Return 0 on end
of file or error condition. Return 1 otherwise. */
int
getword (FILE *fp)
{
int c;
int word = 0;
if (feof (fp))
return 0;
while ((c = getc (fp)) != EOF)
{
if (isword (c))
{
wcount++;
break;
}
COUNT (c);
}
for (; c != EOF; c = getc (fp))
{
COUNT (c);
if (!isword (c))
break;
}
return c != EOF;
}
/* Process file FILE. */
void
counter (char *file)
{
FILE *fp = fopen (file, "r");
if (!fp)
perrf ("cannot open file `%s'", file);
ccount = wcount = lcount = 0;
while (getword (fp))
;
fclose (fp);
report (file, ccount, wcount, lcount);
total_ccount += ccount;
total_wcount += wcount;
total_lcount += lcount;
}
int
main (int argc, char **argv)
{
int i;
if (argc < 2)
errf ("usage: wc FILE [FILE...]");
for (i = 1; i < argc; i++)
counter (argv[i]);
if (argc > 2)
report ("total", total_ccount, total_wcount, total_lcount);
return 0;
}
Found at: http://www.gnu.org/software/cflow/manual/html_node/Source-of-wc-command.html