ansaurus

Question

getElementsByTagName() equivalent for textNodes

Answer 1

+1 A:

Check this out:
http://refactormycode.com/codes/341-jquery-all-descendent-text-nodes-within-a-node.

Also, check out the nodeType property of the DOM.

Alex Ciminian 2010-04-05 17:05:50

Thanks - this solution walks the DOM manually though, so it's not really the "equivalent" of `getElementsByTagName()`

levik 2010-04-05 22:01:51

Answer 2

+3 A:

Update:

I have outlined some basic performance tests for each of these 6 methods over 1000 runs. getElementsByTagName is the fastest but it does a half-assed job, as it does not select all elements, but only one particular type of tag ( i think p) and blindly assumes that its firstChild is a text element. It might be little flawed but its there for demonstration purpose and comparing its performance to TreeWalker. Run the tests yourselves on jsfiddle to see the results.

Using a TreeWalker
Custom Iterative Traversal
Custom Recursive Traversal
Xpath query
querySelectorAll
getElementsByTagName

Let's assume for a moment that there is a method that allows you to get all Text nodes natively. You would still have to traverse each resulting text node and call node.nodeValue to get the actual text as you would do with any DOM Node. So the issue of performance is not with iterating through text nodes, but iterating through all nodes that are not text and checking their type. I would argue (based on the results) that TreeWalker performs just as fast as getElementsByTagName, if not faster (even with getElementsByTagName playing handicapped).

Ran each test 1000 times.

Method                  Total ms        Average ms
--------------------------------------------------
document.TreeWalker          816             0.816
Iterative Traverser         2518             2.518
Recursive Traverser        21556            21.556
XPath query                 5087             5.087
querySelectorAll            5352             5.352
getElementsByTagName         660              0.66

Source for each method:

TreeWalker

function nativeTreeWalker() {
    var walker = document.createTreeWalker(
        document.body, 
        NodeFilter.SHOW_TEXT, 
        null, 
        false
    );

    var node;
    var textNodes = [];

    while(node = walker.nextNode()) {
        textNodes.push(node.nodeValue);
    }
}

Recursive Tree Traversal

function customRecursiveTreeWalker() {
    var result = [];

    (function findTextNodes(current) {
        for(var i = 0; i < current.childNodes.length; i++) {
            var child = current.childNodes[i];
            if(child.nodeType == 3) {
                result.push(child.nodeValue);
            }
            else {
                findTextNodes(child);
            }
        }
    })(document.body);
}

Iterative Tree Traversal

function customIterativeTreeWalker() {
    var result = [];
    var root = document.body;

    var node = root.childNodes[0];
    while(node != null) {
        if(node.nodeType = 3) {
            result.push(node.nodeValue);
        }

        if(node.hasChildNodes()) {
            node = node.firstChild;
        }
        else {
            while(node.nextSibling == null && node != root) {
                node = node.parentNode;
            }
            node = node.nextSibling;
        }
    }
}

querySelectorAll

function nativeSelector() {
    var elements = document.querySelectorAll("body *");
    var results = [];
    var child;
    for(var i = 0; i < elements.length; i++) {
        child = elements[i].childNodes[0];
        if(elements[i].hasChildNodes() && child.nodeType == 3) {
            results.push(child.nodeValue);
        }
    }
}

getElementsByTagName (handicap)

function getElementsByTagName() {
    var elements = document.getElementsByTagName("p");
    var results = [];
    for(var i = 0; i < elements.length; i++) {
        results.push(elements[i].childNodes[0].nodeValue);
    }
}

XPath

function xpathSelector() {
    var xpathResult = document.evaluate(
        "//*/text()", 
        document, 
        null, 
        XPathResult.ORDERED_NODE_ITERATOR_TYPE, 
        null
    );

    var results = [], res;
    while(res = xpathResult.iterateNext()) {
        results.push(res);
    }
}

Also, you might find this discussion helpful - http://bytes.com/topic/javascript/answers/153239-how-do-i-get-elements-text-node

Anurag 2010-04-05 17:31:27

Interesting... Does `createTreeWalker()` work on IE?

levik 2010-04-06 22:43:31

I have gotten mixed results for each of the methods above in different browser - these results above are for Chrome. Firefox and Safari behave very differently. I don't have have access to IE unfortunately, but you could test these yourselves on IE to see if it works. As for browser optimization, I wouldn't worry about picking a different method for each browser as long as the differences are in order of tens of milliseconds or maybe even the low hundreds.

Anurag 2010-04-07 00:29:48

Answer 3

+1 A:

 document.deepText= function(hoo, fun){
        var A= [], tem;
        if(hoo){
            hoo= hoo.firstChild;
            while(hoo!= null){
                if(hoo.nodeType== 3){
                    if(typeof fun== 'function'){
                        tem= fun(hoo);
                        if(tem!= undefined) A[A.length]= tem;
                    }
                    else A[A.length]= hoo;
                }
                else A= A.concat(document.deepText(hoo, fun));
                hoo= hoo.nextSibling;
            }
        }
        return A;
    }

/* You can return an array of all the decendent text nodes of some parent element, or you can pass it some function and do something( find or replace or whatever) to the text in place.

This example returns the text of the non-whitespace textnodes in the body:

var A= document.deepText(document.body, function(t){
    var tem= t.data;
    return /\S/.test(tem)? tem: undefined;
});
alert(A.join('\n'))

*/ Handy for search and replace, highlighting and so on

kennebec 2010-04-05 17:59:44

ansaurus

tags:

views:

answers:

getElementsByTagName() equivalent for textNodes

related questions