libxml2 error with namespaces and xpath

cateof picture cateof · Jun 28, 2010 · Viewed 13.1k times · Source

I am pasting some code here that compiles with no warning using gcc file.c -lxml2, assuming that libxml2 is installed in your system.

#include <libxml/parser.h>
#include <libxml/xpath.h>
#include <assert.h>
#include <libxml/tree.h>
#include <libxml/xpathInternals.h>

xmlDocPtr
getdoc (char *docname) {
    xmlDocPtr doc;
    doc = xmlParseFile(docname);

    if (doc == NULL ) {
        fprintf(stderr,"Document not parsed successfully. \n");
        return NULL;
    }

    return doc;
}

xmlXPathObjectPtr
getnodeset (xmlDocPtr doc, xmlChar *xpath){

    xmlXPathContextPtr context;
    xmlXPathObjectPtr result;

    context = xmlXPathNewContext(doc);
    if (context == NULL) {
        printf("Error in xmlXPathNewContext\n");
        return NULL;
    }

    if(xmlXPathRegisterNs(context,  BAD_CAST "new", BAD_CAST "http://www.example.com/new") != 0) {
        fprintf(stderr,"Error: unable to register NS with prefix");
        return NULL;
    }

    result = xmlXPathEvalExpression(xpath, context);
    xmlXPathFreeContext(context);
    if (result == NULL) {
        printf("Error in xmlXPathEvalExpression\n");
        return NULL;
    }
    if(xmlXPathNodeSetIsEmpty(result->nodesetval)){
        xmlXPathFreeObject(result);
                printf("No result\n");
        return NULL;
    }
    return result;
}

int
main(int argc, char **argv) {

    char *docname;
    xmlDocPtr doc;
    xmlChar *xpath = (xmlChar*) "/new:book/section1";
    xmlNodeSetPtr nodeset;
    xmlXPathObjectPtr result;
    int i;
    xmlChar *keyword;

    if (argc <= 1) {
        printf("Usage: %s docname\n", argv[0]);
        return(0);
    }

    docname = argv[1];
    doc = getdoc(docname);
    result = getnodeset (doc, xpath);
    if (result) {
        nodeset = result->nodesetval;
        for (i=0; i < nodeset->nodeNr; i++) {
            keyword = xmlNodeListGetString(doc, nodeset->nodeTab[i]->xmlChildrenNode, 1);
        printf("keyword: %s\n", keyword);
        xmlFree(keyword);
        }
        xmlXPathFreeObject (result);
    }

    xmlFreeDoc(doc);
    xmlCleanupParser();
    return (1);
}

My problem is that I want to parse the following xml

<?xml version="1.0" encoding="UTF-8"?>
<book xmlns="http://www.example.com/new">
    <section1>Sec_1</section1>
    <section2>Sec_2</section2>
</book>

the book element defines a namespace inside that element. I want to print the value in the xpath /book/section1 and it returns NULL. When I am trying to return the element under a namespace I also get errors, ie /new:book/section1

I assume that my code fails because I am not using correctly the namespace prefixes. I run out of time. Could you please help?

Answer

dmaestro12 picture dmaestro12 · Aug 12, 2010

Turns out, as I found out from here, it is not really a failure of libXml, it's a problem because libXml correctly follows the XML/XPATH specifications.

The solutions proposed by R Bourdeau are correct, however, if you have control of the xml document you are parsing.

The context for the XPATH query is independent of the namespace qualifiers in the xml document. The default namespace forces all child tags into a namespace; they don't require qualification in the document but must be qualified in the xpath query. Fortunately, you registered the namespace as new with libXml, so cateof's solution should work.

xmlXPathRegisterNs(context,  BAD_CAST "new", BAD_CAST "http://www.example.com/new"

xmlChar *xpath = (xmlChar*) "/new:book/new:section1";

I'm inlining the xml here for visibility:

<?xml version="1.0" encoding="UTF-8"?>
<book xmlns="http://www.example.com/new">
    <section1>Sec_1</section1>
    <section2>Sec_2</section2>
</book>