views:

183

answers:

3

I am working on a program that sorts its input lines alphabetically/numericaly depending on the arguments passed to main. And this is the follow-up exercise:

Add a field-handling capability, so sorting may be done on fields within lines, each field sorted according to an independent set of options. (The index for this book was sorted with -df for the index category and -n for the page numbers.)

Im a bit puzzled on what do they mean with fields.

What does field_of function exactly do? Does it increments the original pointer field times, with field being a string of non blank characters?

Also, if num_fields are entered, then compare function will return upon reaching the first non zero comparison, is that right? If the result was zero (equal strings) then it wont return a thing, because the strings dont need to be replaced. Otherwise, it returns a number.

    #include <stdio.h>
    #include <string.h>

    #define MAX_FIELDS 10

    #define FLAG_DIRECTORY (0x01 << 0)
    #define FLAG_FOLD  (0x01 << 1)
    #define FLAG_NUMERIC (0x01 << 2)
    #define FLAG_REVERSE (0x01 << 3)

    int fieldarray[MAX_FIELDS];
    unsigned char flagarray[MAX_FIELDS];
    int num_fields = 0;

    #define MAXLINES 5000  /* max #lines to be sorted */
    char *lineptr[MAXLINES]; /* pointers to text lines */

    int readlines(char *lineptr[], int nlines);
    void writelines(char *lineptr[], int nlines);

    void qsort(void *lineptr[], int left, int right,
               int (*comp)(void *, void *));

    static char *field_of(char *s, int n);
    static isdir(int c);
    int compare(char *, char *);
    int stringcmp(char *, char *);
    int stringcmpi(char *, char *);
    int dircmp(char *, char *);
    int numcmp(char *, char *);
int main()

     if ((nlines = readlines(lineptr, MAXLINES)) >= 0) {
      qsort((void **) lineptr, 0, nlines-1, (int (*)(void *, void *)) compare);
      writelines(lineptr, nlines);
      return 0;
     } else {
      printf("input too big to sort\n");
      return 1;
     }
    }

    #define MAXLEN 1000 /* max length of any input line */
    int getline(char *, int);
    char *alloc(int);



    /* qsort:  sort v[left]...v[right] into increasing order */
    void qsort(void *v[], int left, int right,
               int (*comp)(void *, void *))
    {
     int i, last;
     void swap(void *v[], int, int);

     if (left >= right) /* do nothing if array contains */
      return;   /* fewer than two elements */
     swap(v, left, (left + right)/2);
     last = left;
     for (i = left+1; i <= right; i++)
      if ((*comp)(v[i], v[left]) < 0)
       swap(v, ++last, i);
     swap(v, left, last);
     qsort(v, left, last-1, comp);
     qsort(v, last+1, right, comp);
    }

    void swap(void *v[], int i, int j)
    {
     void *temp;

     temp = v[i];
     v[i] = v[j];
     v[j] = temp;
    }

    static char *field_of(char *s, int n)
    {
     while (isspace(*s))
      s++;
     while (--n > 0) {
      while (!isspace(*s)) {
       if (*s == '\0')
        return NULL;
       s++;
      }
     }

     return s;
    }

    static isdir(int c)
    {
     return isalpha(c) || isdigit(c) || isspace(c);
    }

    int compare_field(char *s1, char *s2, unsigned int flags)
    {
     int d;

     if (flags & FLAG_NUMERIC) {
      d = numcmp(s1, s2);
     } else if (flags & FLAG_DIRECTORY) {
      do {
       while (!isdir(*s1) && !isspace(*s1) && *s1 != '\0')
        s1++;
       while (!isdir(*s2) && !isspace(*s2) && *s2 != '\0')
        s2++;
       if (flags & FLAG_FOLD)
        d = toupper(*s1) - toupper(*s2);
       else
        d = *s1 - *s2;
      } while (d == 0 && !isspace(*s1) && !isspace(*s2)
               && *s1++ != '\0' && *s2++ != '\0');
     } else {
      do {
       if (flags & FLAG_FOLD)
        d = toupper(*s1) - toupper(*s2);
       else
        d = *s1 - *s2;
      } while (d == 0 && !isspace(*s1) && !isspace(*s2)
               && *s1++ != '\0' && *s2++ != '\0');
     }

     if (flags & FLAG_REVERSE)
      return -d;
     else
      return d;
    }

    /* compare:  compare s1 and s2 according to the values of the
     external variables numeric, reverse, fold, and directory. */
    int compare(char *s1, char *s2)
    {
     int i, d;
     char *f1, *f2;

     for (i = 0; i < num_fields; i++) {
      f1 = field_of(s1, fieldarray[i]);
      f2 = field_of(s2, fieldarray[i]);
      d = compare_field(f1, f2, flagarray[i]);
      if (d != 0)
       return d;
     }
     if (numeric)
      d = numcmp(s1, s2);
     else if (directory)
      d = dircmp(s1, s2);
     else
      d = stringcmp(s1, s2);

     if (reverse)
      return -d;
     else
      return d;
    }

    /* stringcmp:  compare s1 and s2 as strings */
    int stringcmp(char *s1, char *s2)
    {
     if (fold)
      return stringcmpi(s1, s2);
     else
      return strcmp(s1, s2);
    }

    /* stringcmpi:  compare s1 and s2 case-insensitively */
    int stringcmpi(char *s1, char *s2)
    {
     while (toupper(*s1) == toupper(*s2)) {
      if (*s1 == '\0')
       return 0;
      s1++;
      s2++;
     }

     return toupper(*s1) - toupper(*s2);
    }

    /* dircmp:  compare s1 and s2 in "directory order" */
    int dircmp(char *s1, char *s2)
    {
     int d;

     do {
      while (!isdir(*s1) && *s1 != '\0')
       s1++;
      while (!isdir(*s2) && *s2 != '\0')
       s2++;
      if (fold)
       d = toupper(*s1) - toupper(*s2);
      else
       d = *s1 - *s2;
     } while (d == 0 && *s1++ != '\0' && *s2++ != '\0');

     return d;
    }

    /* numcmp:  compare s1 and s2 numerically */
    int numcmp(char *s1, char *s2)
    {
     extern double atof(const char *);
     double v1, v2;

     v1 = atof(s1);
     v2 = atof(s2);
     if (v1 < v2)
      return -1;
     else if (v1 > v2)
      return 1;
     else
      return 0;
    }
+3  A: 

The idea of a field is a sub-part of the line. It sounds like you are asking about delimited fields, so a line like:

a b 1 2 3

Field 1 is 'a', field 2 is 'b', etc.

Let's say all your lines are like the example above. You may want to numerically sort the lines based on field 3.

While I used an example of space delimited fields, there are other ways to define fields as well. Another common way to define fields is using absolute positions (maybe field 1 goes from offset 0 to 9 while field 2 goes from offset 10 to 14).

When you are sorting by field, you will need to isolate the appropriate part of each line and the compare those isolated parts.

Finally, when you have multiple fields, you can do multiple comparisons. First compare the primary field but if they are equal, fall back on the second fields. In pseudo-code:

# returns -1 if line1 < line2, 0 if line1 == line2, 1 if line1 > line2
int sort(line1, line2):
    foreach fielddefinition:
        line1_field = extract_field(line1)
        line2_field = extract_field(line2)

        if line1_field < line2_field:
            return -1
        elif line2_field < line1_field:
            return 1

        # this field is equal, continue checking with the next field

    # Since all fields are equal, the lines are equal
    return 0
R Samuel Klatchko
So for example, in line: abab assd asdfield 1 is ababfield 2 is assdfield 3 is asdAnd ignore all the blanks in between them?
Tool
@Tool - sure, that is a completely acceptable way to define fields. There are a variations but that should is good enough to get you going.
R Samuel Klatchko
+1  A: 

It appears so far you've been handling each line as a single entity. The Field concept will allow your sorting program to work at the level of sub-parts (fields) within the line.

The requirement is that each field will be able to receive its own sorting directive: eg. handle the field #2 as a string, the field #4 as an integer value etc.

The ordered list of the fields upon which the sort should be based will also likely be specified (with configuration or command line options), for example allowing to sort a list of names and phone numbers by say the area code (first) and by name (second) resulting in a sorted list like

Bob Parker      201 365 7733
Charles Louis   201 123 4444
Alan Black      366 001 9876

etc.

Another implication is that the program will receive some instruction as to how the fields are to be located (i.e. with a separator, fixed length etc.). Such info could be however implicit, i.e. the assignment (I'm assuming this is homework), may specify that the program only deals with files where the delimiter is say the tab character.

BTW: what does the title of the question ("C Pointers to functions") has to do with this question ?

mjv
Tool
A: 

Your field_of function takes a char * (string) pointer, skips past any spaces it finds, skips past one word (field) containing n spaces, then returns the pointer to that location in the string (the end of the field, oddly). If it hits the end of the string it returns NULL.

ezod