views:

114

answers:

4

Hi

I am trying to make a very easy converter/compressor; the program should take a file with 4 different types of ASCII characters and writ it out as binary to a file. The program should also read the binary file and convert it to ASCII and print it out on the screen. Under is my code, I can’t really get the char/cstring. What types of improvement must I do to get this to work?

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

char compresser(char c);
char converter(char c);

int main(int argc, char **argv)
{
    char *c;
    FILE *If = fopen("A.txt", "r");
    FILE *Uf = fopen("B.txt", "rw");

    if(If == NULL || Uf == NULL) {
            printf("Could not open file");
    }

    if(argc < 4) {
        printf("Too few argument, must be 3\n");

    } else if(strcmp(argv[1], "p") == 0) {
        while((c = fgetc(If)) != EOF) {
            printf("%c", c);
        }

    } else if(strcmp(argv[1], "e") == 0) {
        while((c = fgetc(If)) != EOF) {
            fprintf(Uf, "%c\n", compresser(c));
        }

    } else if(strcmp(argv[1], "d") == 0) {
        while((c = fgetc(Uf)) != EOF) {
            printf("%c", converter(c));
        }

    } else {
        printf("Not a valid command\n");
    }
}

char compresser(char c)
{
        if(c == ' ') {
            return '00';
        } else if(c == ':') {
            return '01';
        } else if(c == '@') {
            return '10';
        } else if(c == '\n') {
            return '11';
        } else {
            return 'e';
        }
}

char converter(char c)
{
        if(c == '00') {
            return ' ';
        } else if(c == '01') {
            return ':';
        } else if(c == '10') {
            return '@';
        } else if(c == '11') {
        return '\n';
    } else {
        return 'e';
    }

}

A: 

This code isn't correct:

char compresser(char c)
{
        if(c == ' ') {
            return '00';
        } else if(c == ':') {
            return '01';
        } else if(c == '@') {
            return '10';
        } else if(c == '\n') {
            return '11';
        } else {
            return 'e';
        }
}

'00' is invalid in C, because a character literal must consist of a single character. '\x00' means the character with the value 0. Use \x00 for a binary 0x00, not 00.

Example code for clarification:

#include <stdio.h>

int main()
{
    char c = '\x61';

    printf("%c\n", c);

    return 0;
}

See how I defined c.


That said, however, I don't see how your approach intends to compress the characters.

Eli Bendersky
What I want to do is to read a character and get the binary value. But then i need to know the binary value of' ' - (blank) = ?: = ?@ = ?\n = ?Can you please give me the binary value for this?
@user265767: I think you're confused. The character's binary value *is* stored in the file. Indeed computers can't store much else than binary values for now. Do some background reading on characters and their representation in computers, and read on ASCII. For "binary values of..." characters, google "ascii table"
Eli Bendersky
A: 

Well there's a number of issues that you might want to address.

The first thing I noticed is that you test for error conditions, print an error message, and then continue processing as if everything is OK.

The second is that your compression algorihm isn't a compression algorithm. As it stands, you seem to want each uncompressed character to become two in compressed form. Even if you instead want to write a decimal value as suggested by Eli, you're not reducing the space required to store the character.

My guess is that what you really want is binary, not decimal representation. That would alllow you to represent each character using two bits instead of a full byte.

For example, given four characters 'A', 'B', 'C' and 'D' one possible binary representation would be

CHAR    BITS
A   <=>   00
B   <=>   01
C   <=>   10
D   <=>   11

Then you can choose how to combine these values into compressed form for example the four character sequence ABAD could be represented as either 00010011 or 11000100. Just make sure that the you separate them in the way the corresponds to the way you combine them.

Just to make it clear 00010011 is equivalent to the decimal 19.

Have a look here for more details on how to manipulate bits.

torak
Yes, you guessed right! But I don't how I can do that
Thank you. This is something I need to get done very fast, can someone give me a start?
@user265767: Use a lookup table or array. Or you could do this arithmetically: `compression_code = letter - 'A';`. Next, pack the bits using bit shift and binary-OR.
Thomas Matthews
@user265767: See my posted answer.
Thomas Matthews
A: 

Here is an example code fragment:

unsigned int Compress(char letter_1, char letter_2, char letter3, char letter 4)
{
  unsigned int value = 0;
  unsigned int result = 0;
  value = letter1 - 'A';
  result = result << 2; // Shift the old to make room for new bits.
  result |= value;      // Put in new bits.
  value = letter2 - 'A';
  result = result << 2; // Shift the old to make room for new bits.
  result |= value;      // Put in new bits.
  value = letter3 - 'A';
  result = result << 2; // Shift the old to make room for new bits.
  result |= value;      // Put in new bits.
  value = letter4 - 'A';
  result = result << 2; // Shift the old to make room for new bits.
  result |= value;      // Put in new bits.
  return result;
}

This is an example of compressing the letter (letter - 'A') and packing into an unsigned int (result = result << 2; result |= value;).

There may be more efficient or compact methods, but this is for demonstration purposes only.

Thomas Matthews
A: 

Hi

Thanks for many great tips :D Her is a try, the compress part or command option e creates a empty compressed file, can anyone see why?

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

long readFile(FILE *f, unsigned char **buffer);
void compress(FILE *f, long fileSize, unsigned char *buffer);

int main(int argc, char *argv[]) {
    if(argc != 4) {
        printf("Too few arguments, must be 4\n");
        exit(0);
    }

    FILE *in = fopen(argv[2], "rb");
    FILE *ou = fopen(argv[3], "wb");

    unsigned char *buffer = NULL;

    if(in == NULL || ou == NULL) {
        printf("Could not open file\n");
        exit(8);
    }

    long fileSize = readFile(in, &buffer);

    if(strcmp(argv[1], "p") == 0) {
        printf("%s\n", buffer);
    } else if(strcmp(argv[1], "e") == 0) {

        int i;

        for(i = 0; i < fileSize; i++) {
            switch(buffer[i]) {
            case ' ':
                buffer[i] = 0x0;
                break;
            case ':':
                buffer[i] = 0x1;
                break;
            case '@':
                buffer[i] = 0x2;
                break;
            case '\n':
                buffer[i] = 0x3;
                break;
            }
        }
        compress(ou, fileSize, buffer);
        fclose(ou);

    } else if(strcmp(argv[1], "d") == 0) {

    } else {
        printf("Not a valid command\n");
    }
    free(buffer);
    return 1;
}

/*
 * Reades the file and return the size of the fil
 */
long readFile(FILE *f, unsigned char **buffer)
{
    fseek(f, 0, SEEK_END);
    long fileSize = ftell(f);
    fseek(f, 0, SEEK_SET);

    (*buffer) = (unsigned char*) malloc(sizeof(char) * fileSize);

    fread((*buffer), sizeof(char), fileSize, f);
    fclose(f);

    return fileSize;
}

void compress(FILE *f, long fileSize, unsigned char *buffer)
{
    int i; 
    char insert = 0; 
    int step;

    for(i = 0; i < fileSize; i++) {
        step = i % 4 + 1;

        switch(step) {
        case 1: 
            insert |= buffer[i] << 6;
            break;
        case 2:
            insert |= buffer[i] << 4;
            break;
        case 3:
            insert |= buffer[i] << 2;
            break;
        case 4:
        insert |= buffer[i];
        putc(insert, f);
        insert = 0;
        break;
    }
}
putc('\0', f);

}