tags:

views:

193

answers:

1

I'm loading in an XML file in JavaScript, and once I've done that, I'm iterating through all of the Document's childNodes, which are all of the XML elements in the document. The problem that I'm having is that I need to ignore elements which are just not real elements, but rather newlines, tabs, etc. Right now I'm doing the following:

for (var i = 0; i < childList.length; i++)
{
    switch (childList[i].nodeType)
    {
     case 1: // Node.ELEMENT_NODE
      /* snip */
      break;
     case 3: // Node.TEXT_NODE
     case 8: // Node.COMMENT_NODE

      // Ensure the node is a valid node, and not newlines, tabs, etc
      if (!nodeEmpty(childList[i].nodeValue)
      {
       // do stuff
      }

      break;
    }
}

function nodeEmpty(nodeValue)
{
    var isEmpty = true;
    var length = nodeValue.length;
    for (var i = 0; i < length; i++)
    {
     if (nodeValue[i] != ' ' && nodeValue[i] != '\n' && nodeValue[i] != '\t')
     {
      isEmpty = false;
      break;
     }
    }

    return isEmpty;
}

But this seems like a very non-elegant way of achieving this. Is there a better way of doing this?

+1  A: 
/**
 * xmlsimplify simplifies an XML object by copying it into a
 * javascript object with only a subset of the attributes of the original.
 * This makes it suitable for output as JSON, or for further processing by
 * other functions.

 * @param {Object} xml
 * @param {Boolean} strip if true, strip empty (whitespace) nodes
 */
var xmlsimplify = function(xml, strip) {
    var obj = {};
    if (typeof xml === "string") {
        xml = getXmlDocument(xml);
    }
    var traverse = function(node) {
        var i, l, n, a, j;
        if (node.nodeType) {
            var o = {};
            switch (node.nodeType) {
            case 1:
                //element node;
                o = {
                    nodeName: node.nodeName
                }; //record nodename
                for (i = 0, l = node.attributes.length, n = node.attributes; i < l; i++) { //append attributes
                    a = traverse(n.item(i));
                    for (j in a) {
                        if (a.hasOwnProperty(j)) {
                            o[j] = a[j];
                        }
                    }
                }
                if (node.childNodes.length) {
                    o.childNodes = [];
                    for (i = 0, l = node.childNodes.length, n = node.childNodes; i < l; i++) {
                        a = traverse(n.item(i));
                        if (a !== null) {
                            o.childNodes.push(a);
                        }
                    }
                    if (o.childNodes.length === 0) {
                        delete o.childNodes;
                    }
                }
                break;
            case 2:
                //attribute node
                o[node.nodeName] = node.nodeValue; //return an attribute object
                break;
            case 3:
                //text node
                //strip empty nodes
                if (node.nodeValue.match(/[^\s]/) && (strip === true)) {
                    o = node.nodeValue;
                } else {
                    o = null;
                }
                if (strip !== true) {
                    o = node.nodeValue;
                }
                break;
            case 4:
                //cdata section node
                o = node.nodeValue;
                break;
            case 9:
                //document node;
                o = traverse(node.firstChild);
                break;
            case 10:
                o = traverse(node.nextSibling);
                break;
            }
        }
        return o;
    };
    obj = traverse(xml);
    return obj;
};
Breton
Why does the regex use "^\s" instead of "\S"?
Raul Agrait
I suppose the two are equivalent, but I find the former to be clearer. I'm a bit hazy on my character classes. Code clarity is generally important. I might know that \S is any non whitespace character today, but I may forget it again in 6 months time.
Breton