I want to read an XML file into a char *buffer
using C.
What is the best way to do this?
How should I get started?
I want to read an XML file into a char *buffer
using C.
What is the best way to do this?
How should I get started?
I think this would be a great question for Jeff Atwood to respond to. He's our resident expert on the C language.
You can use the stat() function to get the file size. then allocate a buffer using malloc after it reading the file using fread.
the code will be something like that:
struct stat file_status;
char *buf = NULL;
FILE * pFile;
stat("tmp.xml", &file_status);
buf = (char*)malloc(file_status.st_size);
pFile = fopen ("tmp.xml","r");
fread (buf,1,file_status.st_size,pFile);
fclose(pFile);
Is reading the contents of the file into a single, simple buffer really what you want to do? XML files are generally there to be parsed, and you can do this with a library like libxml2, just to give one example (but notably, is implemented in C).
Here is a full program that reads in a whole XML file (really, any file), into a buffer. It includes about as much error-checking as would be useful.
N.B. everything is done in main()
. Turning it into a callable function is left as an exercise for the reader.
(Tested, compiled with GCC 4.3.3. Switches were -Wall -W --pedantic --ansi
.)
Comments on this will be addressed in approximately eight hours.
#include <stdio.h>
#include <stdlib.h>
int main (int argc, char *argv[]) {
char *buffer; /* holds the file contents. */
size_t i; /* indexing into buffer. */
size_t buffer_size; /* size of the buffer. */
char *temp; /* for realloc(). */
char c; /* for reading from the input. */
FILE *input; /* our input stream. */
if (argc == 1) {
fprintf(stderr, "Needs a filename argument.\n");
exit(EXIT_FAILURE);
}
else if (argc > 2) {
fprintf(stderr, "Well, you passed in a few filenames, but I'm only using %s\n", argv[1]);
}
if ((input = fopen(argv[1], "r")) == NULL) {
fprintf(stderr, "Error opening input file %s\n", argv[1]);
exit(EXIT_FAILURE);
}
/* Initial allocation of buffer */
i = 0;
buffer_size = BUFSIZ;
if ((buffer = malloc(buffer_size)) == NULL) {
fprintf(stderr, "Error allocating memory (before reading file).\n");
fclose(input);
}
while ((c = fgetc(input)) != EOF) {
/* Enlarge buffer if necessary. */
if (i == buffer_size) {
buffer_size += BUFSIZ;
if ((temp = realloc(buffer, buffer_size)) == NULL) {
fprintf(stderr, "Ran out of core while reading file.\n");
fclose(input);
free(buffer);
exit(EXIT_FAILURE);
}
buffer = temp;
}
/* Add input char to the buffer. */
buffer[i++] = c;
}
/* Test if loop terminated from error. */
if (ferror(input)) {
fprintf(stderr, "There was a file input error.\n");
free(buffer);
fclose(input);
exit(EXIT_FAILURE);
}
/* Make the buffer a bona-fide string. */
if (i == buffer_size) {
buffer_size += 1;
if ((temp = realloc(buffer, buffer_size)) == NULL) {
fprintf(stderr, "Ran out of core (and only needed one more byte too ;_;).\n");
fclose(input);
free(buffer);
exit(EXIT_FAILURE);
}
buffer = temp;
}
buffer[i] = '\0';
puts(buffer);
/* Clean up. */
free(buffer);
fclose(input);
return 0;
}
There are also a bunch of answers for a similar question at http://stackoverflow.com/questions/238603/how-can-i-get-a-files-size-in-c
Hopefully bug-free ISO-C code to read the contents of a file and add a '\0' char:
#include <stdlib.h>
#include <stdio.h>
long fsize(FILE * file)
{
if(fseek(file, 0, SEEK_END))
return -1;
long size = ftell(file);
if(size < 0)
return -1;
if(fseek(file, 0, SEEK_SET))
return -1;
return size;
}
size_t fget_contents(char ** str, const char * name, _Bool * error)
{
FILE * file = NULL;
size_t read = 0;
*str = NULL;
if(error) *error = 1;
do
{
file = fopen(name, "rb");
if(!file) break;
long size = fsize(file);
if(size < 0) break;
if(error) *error = 0;
*str = malloc((size_t)size + 1);
if(!*str) break;
read = fread(*str, 1, (size_t)size, file);
(*str)[read] = 0;
*str = realloc(*str, read + 1);
if(error) *error = (size != (long)read);
}
while(0);
if(file) fclose(file);
return read;
}
And if you want to parse XML, not just reading it into a buffer (something which would not be XML-specific, see Christoph's and Baget's answers), you can use for instance libxml2:
#include <stdio.h>
#include <string.h>
#include <libxml/parser.h>
int main(int argc, char **argv) {
xmlDoc *document;
xmlNode *root, *first_child, *node;
char *filename;
if (argc < 2) {
fprintf(stderr, "Usage: %s filename.xml\n", argv[0]);
return 1;
}
filename = argv[1];
document = xmlReadFile(filename, NULL, 0);
root = xmlDocGetRootElement(document);
fprintf(stdout, "Root is <%s> (%i)\n", root->name, root->type);
first_child = root->children;
for (node = first_child; node; node = node->next) {
fprintf(stdout, "\t Child is <%s> (%i)\n", node->name, node->type);
}
fprintf(stdout, "...\n");
return 0;
}
On an Unix machine, you typically compile the above with:
% gcc -o read-xml $(xml2-config --cflags) -Wall $(xml2-config --libs) read-xml.c
I believe that question was about XML parsing and not about file reading, however OP should really clarify this.
Any way you got plenty example how to read file.
Another option to xml parsing in additional to sgm suggestion will be Expat library
This has the potential to cut down on useless copying of the data. The trick is to ask the OS for what you want, instead of doing it. Here's an implementation I made earlier:
#ifndef MMAP_H
#define MMAP_H
#include <sys/types.h>
struct region_t {
void *head;
off_t size;
};
#define OUT_OF_BOUNDS(reg, p) \
(((void *)(p) < (reg)->head) || ((void *)(p) >= ((reg)->head)+(reg)->size))
#define REG_SHOW(reg) \
printf("h: %p, s: %ld (e: %p)\n", reg->head, reg->size, reg->head+reg->size);
struct region_t *do_mmap(const char *fn);
#endif
#include <stdlib.h>
#include <sys/types.h> /* open lseek */
#include <sys/stat.h> /* open */
#include <fcntl.h> /* open */
#include <unistd.h> /* lseek */
#include <sys/mman.h> /* mmap */
#include "mmap.h"
struct region_t *do_mmap(const char *fn)
{
struct region_t *R = calloc(1, sizeof(struct region_t));
if(R != NULL) {
int fd;
fd = open(fn, O_RDONLY);
if(fd != -1) {
R->size = lseek(fd, 0, SEEK_END);
if(R->size != -1) {
R->head = mmap(NULL, R->size, PROT_READ, MAP_PRIVATE, fd, 0);
if(R->head) {
close(fd); /* don't need file-destructor anymore. */
return R;
}
/* no clean up of borked (mmap,) */
}
close(fd); /* clean up of borked (lseek, mmap,) */
}
free(R); /* clean up of borked (open, lseek, mmap,) */
}
return NULL;
}