views:

42

answers:

1

Hi, In continuation of this question I ask the following question.

I've managed to read and understand what D.Shawley has recommended and I already finished a lot of work. But a problem occured to me again. I've managed to get the Content-Length of the page out of the HTTP response, but now I'm experiencing some problems with getting the length of the HTTP response header. As I understand from the RFC 2616 document, a response header can be preceded with several CRLFs and must be followed by one or more CRLFs. Now I'm heaving trouble with determining the end of the response header.

I've came up with the following files:

IEFDowner.h

    /*
     *  IEFDowner.h
     *  Downer
     *
     *  Created by ief2 on 2/08/10.
     *  Copyright 2010 ief2. All rights reserved.
     *
     *  http://developerief2.site11.com
     *  
     */

    #include <stdio.h> /* snprintf() */
    #include <sys/socket.h> /* SOCKET */
    #include <netdb.h> /* struct addrinfo */
    #include <stdlib.h> /* exit() */
    #include <string.h> /* memset(), strlen(), strcpy(), strstr() */
    #include <errno.h> /* errno */
    #include <unistd.h> /* close() */

    enum _IEFDownerErrorType {
        IEFNoError,
        IEFGAIError, /* Use gai_strerror() to get the error message */
        IEFConnectError, /* Failed to connect with the socket */
        IEFSendError, /* Use strerror() to get the error message */
        IEFReceiveError /* Error with recv() */
    };
    typedef enum _IEFDownerErrorType IEFDownerErrorType;

    /*
     Requests the data from the host at the given path thru the given protocol.

     PAREMTERS
     - host
        The host to connect to.

     - filepath
        The path to the file which to download from the host.

     - buffer
        A buffer to fill with the received data

     - maxSize
        The maximum size of the buffer

     - receivedBytes
        The amount of received bytes

     - errorType
        The type of error received. See the header file for more info (enum _IEFDownerErrorType)

     RETURN VALUE
        The function returns 0 if it succeeded or another code if it failed.
     */

    int
    IEFDownerHTTPDownload(const char *host, 
                          const char *filepath, 
                          void *buffer, 
                          unsigned long maxSize, 
                          long *receivedBytes, 
                          IEFDownerErrorType *errorType);

    /*
     Returns a pointer to a structure containing the IP4 or IP6 addresss.

     PARAMETERS
     - sa
        A pointer to a structure of sockaddr.

     RETURN VALUE
        Returns a pointer to a structure of the type sockaddr_in or the type sockaddr_in6.
     */
    void *
    IEFDownerGetInAddr(struct sockaddr *sa);

    /*
     Gets the content-length information out of an HTTP response header.

     PARAMETERS
     - httpHeader
        The null terminated response.

     - contentLength
        Upon return it contains the content length.

     RETURN VALUE
        The function returns 0 if it succeeded or -1 if it did not.
     */
    int
    IEFDownerGetContentLengthOfPage(const char *httpHeader, 
                                    int *contentLength);

    /*
     Gets the string lenth of the header information

     PARAMETERS
     - received
        The received header

     - headerSize
        Upon return contains the header length

     RETURN VALUE
        The function returns 0 if it succeeded. If there was no header information found, it returns -1.

     DISCUSSION
        The header size includes the trailing CRLF's behind the header data. 
        All empty CRLF's are included until a non-empty line is met. 
        If there're zero empty lines after the first non empty line, the header information was not found.
     */
    int
    IEFDownerGetSizeOfHeader(const char *received, 
                             int *headerSize);

IEFDowner.c

/*
*  IEFDowner.c
*  Downer
*
*  Created by ief2 on 2/08/10.
*  Copyright 2010 ief2. All rights reserved.
*
*  http://developerief2.site11.com
*  
*/



#include "IEFDowner.h"

int
IEFDownerHTTPDownload(const char *host, const char *filepath, void *buffer, unsigned long maxSize, long *rB, IEFDownerErrorType *errorType) {
    int status;                 // contains returned statuses
    int sockfd;                 // the socket file descriptor
    struct addrinfo *infos;     // linked list
    struct addrinfo hints;      // hints to getaddrinfo()
    struct addrinfo *p;         // used in the loop to get valid addrinfo
    long receivedBytes;         // the received bytes
    unsigned requestLength;     // the length of the request
    char *request;              // the http request

    // GET ADDRESS INFO
    // fill hints
    memset(&hints, 0, sizeof(hints));
    hints.ai_socktype = SOCK_STREAM;
    hints.ai_flags = AI_PASSIVE;
    hints.ai_family = AF_UNSPEC;

    // getaddrinfo
    status = getaddrinfo(host, 
                         "80", 
                         &hints, 
                         &infos);
    if(status != 0) {
        if(errorType) *errorType = IEFGAIError;
        return status;
    }

    // FIND FIRST VALID SOCKET
    for(p = infos; p != NULL; p = p->ai_next) {
        // create
        sockfd = socket(p->ai_family,
                        p->ai_socktype,
                        p->ai_protocol);
        if(sockfd == -1)
            continue;

        // try to connect
        status = connect(sockfd, 
                         p->ai_addr, 
                         p->ai_addrlen);
        if(status == -1) {
            close(sockfd);
            continue;
        }

        break;
    }

    // no valid socket found
    if(p == NULL) {
        if(errorType) *errorType = IEFConnectError;
        return status;
    }

    // GET RID OF INFOS
    freeaddrinfo(infos);

    // SEND HTTP REQUEST
    // calculate request length and make
    requestLength = strlen("GET ") + strlen(filepath) + strlen(" HTTP\1.0\n") + strlen("Host: ") + strlen(host) + strlen(":80\n\n") + 1;
    request = malloc(sizeof(char) * requestLength);
    snprintf(request, requestLength, "GET %s HTTP\1.0\nHost: %s:80\n\n", filepath, host);

    // send
    status = send(sockfd,
                  request,
                  requestLength - 1,
                  0);
    if(status == -1) {
        if(errorType) *errorType = IEFSendError;
        close(sockfd);
        return errno;
    }

    // RECEIVE DATA
    receivedBytes = recv(sockfd, 
                         buffer, 
                         maxSize, 
                         0);
    if(receivedBytes == -1 || receivedBytes == 0) {
        if(errorType) *errorType = IEFReceiveError;
        close(sockfd);
        return receivedBytes;
    }

    // SET POINTERS
    if(errorType) *errorType = IEFNoError;
    if(rB) *rB = receivedBytes;
    return 0;
}

void *
IEFDownerGetInAddr(struct sockaddr *sa) {
    // IP4
    if(sa->sa_family == AF_INET)
        return &(((struct sockaddr_in *) sa)->sin_addr);

    // IP6
    return &(((struct sockaddr_in6 *) sa)->sin6_addr);
}

int
IEFDownerGetContentLengthOfPage(const char *httpHeader, int *contentLength) {
    register int i;
    int length;
    char *next;
    char *completeHeader;
    char *header;
    char *fieldName;

    // MAKE HEADER MUTABLE
    completeHeader = malloc(sizeof(char) * (strlen(httpHeader) + 1));
    strcpy(completeHeader, httpHeader);
    header = completeHeader;

    // SEARCH FOR LINE
    // loop for all lines
    next = header;
    do {
        header = next;

        // replace newline
        next = strstr(header, "\n");
        if(next == NULL) {
            free(header);
            return -1;
        }
        *next = '\0';
        for(i = 0; i != strlen("\n"); i++)
            next++;

    } while (strcasestr(header, "Content-Length:") == NULL);

    // SCAN INTEGER
    fieldName = strcasestr(header, "Content-Length:");
    for(i = 0; i != strlen("Content-Length:"); i++)
        fieldName++;

    sscanf(fieldName, "%d", &length);
    if(contentLength) *contentLength = length;

    free(completeHeader);
    return 0;
}

int
IEFDownerGetSizeOfHeader(const char *received, int *headerSize) {
    int length;
    int receivedLength;
    char *next;
    char *completeHeader;
    char *header;
    char *checkChar;
    int firstNonEmptyLineFound;
    int emptiesFound;

    // MAKE HEADER MUTABLE
    completeHeader = malloc(sizeof(char) * (strlen(received) + 1));
    strcpy(completeHeader, received);
    header = completeHeader;

    // SEARCH FOR FIRST NON EMPTY LINE
    receivedLength = strlen(header);
    firstNonEmptyLineFound = 0;
    for(next = header; *next != (char)NULL; next++) {
        printf("%c",*next);
        if(*next != '\n' && *next != '\r') {
            firstNonEmptyLineFound = 1;
            printf("\nFirst Non Empty Found\n\n");
            next++;
            break;
        }
    }
    if(firstNonEmptyLineFound == 0) {
        free(completeHeader);
        return -1;
    }

    // SEARCH FOR FIRST EMPTY LINE
    emptiesFound = 0;
    for(; *next != (char)NULL; next++) {
        checkChar = next;
        printf("%c", *checkChar);
        if(*checkChar == '\n' || *checkChar == '\r') {
            checkChar++;
            printf("%c", *checkChar);
            if(*checkChar == '\n' || *checkChar == '\r') {
                emptiesFound = 1;
                printf("Empty Line Found\n\n");
                break;
            }
        }
    }
    if(emptiesFound == 0) {
        free(completeHeader);
        return -1;
    }

    // GET END OF HEADER
    for(; *next != (char)NULL; next++) {
        printf("%c", *next);
        if(*next != '\n' && *next != '\r') {
            printf("End of header found");
            break;
        }
    }

    // INSERT NULL
    *next == '\0';
    length = strlen(header);

    if(headerSize) *headerSize = length;

    free(completeHeader);
    return 0;
}

main.c

#include <stdio.h>
#include <stdarg.h>
#include "IEFDowner.h"


#define SERVERNAME "developerief2.site11.com"
#define PROTOCOL "80"
#define FILENAME "http://developerief2.site11.com/welcome/welcome.php"
#define MAXHEADERSIZE (1024*1024)
#define DESTFILE "/Users/ief2/Desktop/file.png"

void errorOut(int status, const char *format, ...);

int main (int argc, const char * argv[]) {
    int status;             // return status
    void *headerData;       // header data
    int headerSize;         // size of header
    long rB;                // received bytes
    int pageSize;           // size of bytes of page

    // GET PAGE SIZE
    // get data
    headerData = malloc(1024);
    status = IEFDownerHTTPDownload(SERVERNAME, 
                                   FILENAME, 
                                   (void *)headerData, 
                                   1024 - 1, 
                                   &rB, NULL);
    if(status != 0)
        errorOut(status, "An error occured while downloading header info\n");

    // null terminate data
    ((char *)headerData)[rB] = '\0';

    // get size
    status = IEFDownerGetContentLengthOfPage((const char *)headerData, &pageSize);
    if(status != 0)
        errorOut(status, "An error occured while retreiving page size\n");

    printf("Page Size: %d\n", pageSize);
    printf("---------------------\n%s\n---------------------\n", headerData);

    // CALCULATE HEADER SIZE
    status = IEFDownerGetSizeOfHeader(headerData, &headerSize);
    if(status != 0)
        errorOut(status, "An error occured while getting the header size\n");
    printf("Header Size: %d\n", headerSize);

    return 0;
}

void errorOut(int status, const char *format, ...) {
    va_list args;
    va_start(args, format);
    vfprintf(stderr, format, args);
    va_end(args);
    exit(status);
}

The result I'm getting is not correct. I'm getting the end of the response after the first line already:

[Session started at 2010-08-03 21:32:47 +0000.]
Page Size: 3600
---------------------
HTTP/1.1 200 OK
Date: Tue, 03 Aug 2010 19:32:44 GMT
Server: Apache
X-Powered-By: PHP/5.2.11
Content-Length: 3600
Connection: close
Content-Type: text/html

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"&gt;

<html xml:lang="en">
<!-- HEAD -->
<head>
    <meta http-equiv="Content-type" content="text/html; charset=utf-16" />
    <meta name="description" content="A site containing quality freeware for Mac OS X Tiger and newer. All applications are made by a 14 year old developer who masters AppleScript Studio, Objective-C, HTML, PHP, CSS and JavaScript." />
    <meta name="keywords" content="free, freeware, mac os x, 10, tiger, leopard, mac os x, young, developer, freeware, AppleScript, Studio, Xcode, Objective-C, Cocoa" />
    <title>Free OSX Software :: Welcome</title>

    <!-- Shared Styles -->
    <link rel="stylesheet" href="../site-style/styles.css" type="text/css" media="screen" title="no title" charset="utf-8" />
</head>

<!-- PAGE CONTENTS -->
<body>

---------------------
H
First Non Empty Found

TTP/1.1 200 OK
Empty Line Found


DEnd of header foundHeader Size: 1023

The Debugger has exited with status 0.

I hope somebody can help me, ief2

A: 

It seems I have to read things better people write. Due to the previous question I should have already known that the new line in the HTTP responses are two characters: Carriage Return and a Line Feed ("\r\n"), so I should do a string compare.

New Code

int
IEFDownerGetSizeOfHeader(const char *received, int *headerSize) {
    int length;
    int receivedLength;
    char *next;
    char *completeHeader;
    char *header;
    char *checkChar;
    int firstNonEmptyLineFound;
    int emptiesFound;

    // MAKE HEADER MUTABLE
    completeHeader = malloc(sizeof(char) * (strlen(received) + 1));
    strcpy(completeHeader, received);
    header = completeHeader;

    // SEARCH FOR FIRST NON EMPTY LINE
    receivedLength = strlen(header);
    firstNonEmptyLineFound = 0;
    for(next = header; *next != (char)NULL; next++) {
        if(strncmp(next, "\r\n", 2) != 0) {
            firstNonEmptyLineFound = 1;
            next++;
            break;
        }
    }
    if(firstNonEmptyLineFound == 0) {
        free(completeHeader);
        return -1;
    }

    // SEARCH FOR FIRST EMPTY LINE
    emptiesFound = 0;
    for(; *next != (char)NULL; next++) {
        checkChar = next;
        if(strncmp(checkChar, "\r\n", 2) == 0) {
            checkChar++;
            checkChar++;
            if(strncmp(checkChar, "\r\n", 2) == 0) {
                emptiesFound = 1;
                next = checkChar;
                break;
            }
        }
    }
    if(emptiesFound == 0) {
        free(completeHeader);
        return -1;
    }

    // GET END OF HEADER
    for(; *next != (char)NULL; next++) {
        if(strncmp(next, "\r\n", 2) != 0) {
            break;
        }
        next++;
    }

    // INSERT NULL
    *next = '\0';
    length = strlen(header);

    if(headerSize) *headerSize = length;

    free(completeHeader);
    return 0;
}

I hope this can be useful to some people, ief2

Ief2