tags:

views:

3610

answers:

10

I want to read an XML file into a char *buffer using C.

What is the best way to do this?

How should I get started?

A: 

I think this would be a great question for Jeff Atwood to respond to. He's our resident expert on the C language.

Micah
This might be the funniest comment on SO. :)
BobbyShaftoe
cool. How about +1 for hilarity =)
Micah
See, this would be a good use for a comment. This is not an answer.
Geoffrey Chetwood
It wouldn't be as visible and funny though.
Micah
@micah: We are going for quality in answers... not comedy.
Geoffrey Chetwood
@Rich B: Yes thank you we SO users should take ourselves VERY seriosly.
Micah
+2  A: 

You can use the stat() function to get the file size. then allocate a buffer using malloc after it reading the file using fread.

the code will be something like that:

struct stat file_status;
char *buf = NULL;
FILE * pFile;

stat("tmp.xml", &file_status);
buf = (char*)malloc(file_status.st_size);
pFile = fopen ("tmp.xml","r");
fread (buf,1,file_status.st_size,pFile);

fclose(pFile);
Baget
You shouldn't cast malloc(), and should be checking return values. Additionally, stat() is non-standard (I mean the ISO C standard).
Cirno de Bergerac
@sgm: Revise the code and post it, along with your changes and why you made them. I would be happy to vote it up.
Geoffrey Chetwood
@Rich B: Go crazy.
Cirno de Bergerac
The code is c code. Casting malloc is correct.
EvilTeach
The answer is correct but probably not what the OP wanted :-) He probably wanted to *parse* the XML file.
bortzmeyer
@sgm: casting malloc is only incorrect in C90. You should cast it to be compatible with C++, and there's no danger in C99 since implicit function declarations are no longer legal.
Adam Rosenfield
+5  A: 

Is reading the contents of the file into a single, simple buffer really what you want to do? XML files are generally there to be parsed, and you can do this with a library like libxml2, just to give one example (but notably, is implemented in C).

Cirno de Bergerac
That's interesting, thanks.
pngaz
A: 

Here is a full program that reads in a whole XML file (really, any file), into a buffer. It includes about as much error-checking as would be useful.

N.B. everything is done in main(). Turning it into a callable function is left as an exercise for the reader.

(Tested, compiled with GCC 4.3.3. Switches were -Wall -W --pedantic --ansi.)

Comments on this will be addressed in approximately eight hours.

#include <stdio.h>
#include <stdlib.h>


int main (int argc, char *argv[]) {
 char   *buffer;        /* holds the file contents. */
 size_t  i;             /* indexing into buffer. */
 size_t  buffer_size;   /* size of the buffer. */
 char   *temp;          /* for realloc(). */
 char    c;             /* for reading from the input. */
 FILE   *input;         /* our input stream. */


 if (argc == 1) {
      fprintf(stderr, "Needs a filename argument.\n");
      exit(EXIT_FAILURE);
 }
 else if (argc > 2) {
      fprintf(stderr, "Well, you passed in a few filenames, but I'm only using %s\n", argv[1]);
 }

 if ((input = fopen(argv[1], "r")) == NULL) {
      fprintf(stderr, "Error opening input file %s\n", argv[1]);
      exit(EXIT_FAILURE);
 }

 /* Initial allocation of buffer */
 i = 0;
 buffer_size = BUFSIZ;
 if ((buffer = malloc(buffer_size)) == NULL) {
      fprintf(stderr, "Error allocating memory (before reading file).\n");
      fclose(input);
 }

 while ((c = fgetc(input)) != EOF) {
      /* Enlarge buffer if necessary. */
      if (i == buffer_size) {
    buffer_size += BUFSIZ;
    if ((temp = realloc(buffer, buffer_size)) == NULL) {
  fprintf(stderr, "Ran out of core while reading file.\n");
  fclose(input);
  free(buffer);
  exit(EXIT_FAILURE);
    }
    buffer = temp;
      }

      /* Add input char to the buffer. */
      buffer[i++] = c;
 }

 /* Test if loop terminated from error. */
 if (ferror(input)) {
      fprintf(stderr, "There was a file input error.\n");
      free(buffer);
      fclose(input);
      exit(EXIT_FAILURE);
 }

 /* Make the buffer a bona-fide string. */
 if (i == buffer_size) {
      buffer_size += 1;
      if ((temp = realloc(buffer, buffer_size)) == NULL) {
    fprintf(stderr, "Ran out of core (and only needed one more byte too ;_;).\n");
    fclose(input);
    free(buffer);
    exit(EXIT_FAILURE);
      }
      buffer = temp;
 }
 buffer[i] = '\0';

 puts(buffer);

 /* Clean up. */
 free(buffer);
 fclose(input);

 return 0;
}
Cirno de Bergerac
A: 

There are also a bunch of answers for a similar question at http://stackoverflow.com/questions/238603/how-can-i-get-a-files-size-in-c

Ben Combee
+1  A: 

Hopefully bug-free ISO-C code to read the contents of a file and add a '\0' char:

#include <stdlib.h>
#include <stdio.h>

long fsize(FILE * file)
{
    if(fseek(file, 0, SEEK_END))
        return -1;

    long size = ftell(file);
    if(size < 0)
        return -1;

    if(fseek(file, 0, SEEK_SET))
        return -1;

    return size;
}

size_t fget_contents(char ** str, const char * name, _Bool * error)
{
    FILE * file = NULL;
    size_t read = 0;
    *str = NULL;
    if(error) *error = 1;

    do
    {
        file = fopen(name, "rb");
        if(!file) break;

        long size = fsize(file);
        if(size < 0) break;

        if(error) *error = 0;

        *str = malloc((size_t)size + 1);
        if(!*str) break;

        read = fread(*str, 1, (size_t)size, file);
        (*str)[read] = 0;
        *str = realloc(*str, read + 1);

        if(error) *error = (size != (long)read);
    }
    while(0);

    if(file) fclose(file);
    return read;
}
Christoph
A: 

And if you want to parse XML, not just reading it into a buffer (something which would not be XML-specific, see Christoph's and Baget's answers), you can use for instance libxml2:

#include <stdio.h>
#include <string.h>
#include <libxml/parser.h>

int main(int argc, char **argv) {
   xmlDoc *document;
   xmlNode *root, *first_child, *node;
   char *filename;

   if (argc < 2) {
     fprintf(stderr, "Usage: %s filename.xml\n", argv[0]);
     return 1;
   }
   filename = argv[1];

  document = xmlReadFile(filename, NULL, 0);
  root = xmlDocGetRootElement(document);
  fprintf(stdout, "Root is <%s> (%i)\n", root->name, root->type);
  first_child = root->children;
  for (node = first_child; node; node = node->next) {
     fprintf(stdout, "\t Child is <%s> (%i)\n", node->name, node->type);
  }
  fprintf(stdout, "...\n");
  return 0;
}

On an Unix machine, you typically compile the above with:

% gcc -o read-xml $(xml2-config --cflags) -Wall $(xml2-config --libs) read-xml.c
bortzmeyer
A: 

I believe that question was about XML parsing and not about file reading, however OP should really clarify this.
Any way you got plenty example how to read file.
Another option to xml parsing in additional to sgm suggestion will be Expat library

Ilya
A: 

Suggestion: Use memory mapping

This has the potential to cut down on useless copying of the data. The trick is to ask the OS for what you want, instead of doing it. Here's an implementation I made earlier:

mmap.h

#ifndef MMAP_H
#define MMAP_H

#include <sys/types.h>

struct region_t {
  void *head;
  off_t size;
};

#define OUT_OF_BOUNDS(reg, p) \
  (((void *)(p) < (reg)->head) || ((void *)(p) >= ((reg)->head)+(reg)->size))

#define REG_SHOW(reg) \
  printf("h: %p, s: %ld (e: %p)\n", reg->head, reg->size, reg->head+reg->size);

struct region_t *do_mmap(const char *fn);
#endif

mmap.c

#include <stdlib.h>

#include <sys/types.h>  /* open lseek             */
#include <sys/stat.h>   /* open                   */
#include <fcntl.h>      /* open                   */
#include <unistd.h>     /*      lseek             */
#include <sys/mman.h>   /*            mmap        */

#include "mmap.h"

struct region_t *do_mmap(const char *fn)
{
  struct region_t *R = calloc(1, sizeof(struct region_t));

  if(R != NULL) {
    int fd;

    fd = open(fn, O_RDONLY);
    if(fd != -1) {
      R->size = lseek(fd, 0, SEEK_END);
      if(R->size != -1) {
        R->head = mmap(NULL, R->size, PROT_READ, MAP_PRIVATE, fd, 0);
        if(R->head) {
          close(fd); /* don't need file-destructor anymore. */
          return R;
        }
        /*                no clean up of borked (mmap,) */
      }
      close(fd);   /* clean up of borked (lseek, mmap,) */
    }
    free(R); /* clean up of borked (open, lseek, mmap,) */
  }
  return NULL;
}
Anders Eurenius
A: 

Is there a way to parse xml data after having them into the buffer? I mean parse the xml data FROM the buffer and NOT from the file?

Thanks!