views:

227

answers:

3

hello,

I am pasting some code here that compiles with no warning using gcc file.c -lxml2, assuming that libxml2 is installed in your system.

#include libxml/parser.h>
#include libxml/xpath.h>
#include assert.h>
#include libxml/tree.h>
#include libxml/xpathInternals.h>

xmlDocPtr
getdoc (char *docname) {
    xmlDocPtr doc;
    doc = xmlParseFile(docname);

    if (doc == NULL ) {
        fprintf(stderr,"Document not parsed successfully. \n");
        return NULL;
    }

    return doc;
}

xmlXPathObjectPtr
getnodeset (xmlDocPtr doc, xmlChar *xpath){

    xmlXPathContextPtr context;
    xmlXPathObjectPtr result;

    context = xmlXPathNewContext(doc);
    if (context == NULL) {
        printf("Error in xmlXPathNewContext\n");
        return NULL;
    }

    if(xmlXPathRegisterNs(context,  BAD_CAST "new", BAD_CAST "http://www.example.com/new") != 0) {
        fprintf(stderr,"Error: unable to register NS with prefix");
        return NULL;
    }

    result = xmlXPathEvalExpression(xpath, context);
    xmlXPathFreeContext(context);
    if (result == NULL) {
        printf("Error in xmlXPathEvalExpression\n");
        return NULL;
    }
    if(xmlXPathNodeSetIsEmpty(result->nodesetval)){
        xmlXPathFreeObject(result);
                printf("No result\n");
        return NULL;
    }
    return result;
}

int
main(int argc, char **argv) {

    char *docname;
    xmlDocPtr doc;
    xmlChar *xpath = (xmlChar*) "/new:book/section1";
    xmlNodeSetPtr nodeset;
    xmlXPathObjectPtr result;
    int i;
    xmlChar *keyword;

    if (argc nodesetval;
        for (i=0; i nodeNr; i++) {
            keyword = xmlNodeListGetString(doc, nodeset->nodeTab[i]->xmlChildrenNode, 1);
        printf("keyword: %s\n", keyword);
        xmlFree(keyword);
        }
        xmlXPathFreeObject (result);
    }

    xmlFreeDoc(doc);
    xmlCleanupParser();
    return (1);
}

My problem is that I want to parse the following xml file

http://pastebin.com/uv2GJxqk

the book element defines a namespace inside that element. I want to print the value in the xpath /book/section1 and it returns NULL. When I am trying to return the element under a namespace I also get errors, ie /new:book/section1

I assume that my code fails because I am not using correctly the namespace prefixes. I run out of time. Could you please help?

A: 

it is an issue with the default namespace. To match a path you need /new:tag/new:tag and so on

cateof
A: 

This is an annoying failure of the libXml library. As noted by cateof, the problem is the default namespace declaration:

xmlns="http://www.example.com/new"

Two choices:
(1) get rid of that declaration in your book tag or (2) give it a name, and use that name in your tags.

e.g.

xmlns:new="http://www.example.com/new"

Then your tags all look like:

new:book new:section1

and so on.

Robert H. Bourdeau
+1  A: 

Turns out, as I found out from here, it is not really a failure of libXml, it's a problem because libXml correctly follows the XML/XPATH specifications.

The solutions proposed by R Bourdeau are correct, however, if you have control of the xml document you are parsing.

The context for the XPATH query is independent of the namespace qualifiers in the xml document. The default namespace forces all child tags into a namespace; they don't require qualification in the document but must be qualified in the xpath query. Fortunately, you registered the namespace as new with libXml, so cateof's solution should work.

xmlXPathRegisterNs(context,  BAD_CAST "new", BAD_CAST "http://www.example.com/new"

xmlChar *xpath = (xmlChar*) "/new:book/new:section1";

I'm inlining the xml here for visibility:

<?xml version="1.0" encoding="UTF-8"?>
<book xmlns="http://www.example.com/new"&gt;
    <section1>Sec_1</section1>
    <section2>Sec_2</section2>
</book>