Update:
I have outlined some basic performance tests for each of these 6 methods over 1000 runs. getElementsByTagName
is the fastest but it does a half-assed job, as it does not select all elements, but only one particular type of tag ( i think p
) and blindly assumes that its firstChild is a text element. It might be little flawed but its there for demonstration purpose and comparing its performance to TreeWalker
. Run the tests yourselves on jsfiddle to see the results.
- Using a TreeWalker
- Custom Iterative Traversal
- Custom Recursive Traversal
- Xpath query
- querySelectorAll
- getElementsByTagName
Let's assume for a moment that there is a method that allows you to get all Text
nodes natively. You would still have to traverse each resulting text node and call node.nodeValue
to get the actual text as you would do with any DOM Node. So the issue of performance is not with iterating through text nodes, but iterating through all nodes that are not text and checking their type. I would argue (based on the results) that TreeWalker
performs just as fast as getElementsByTagName
, if not faster (even with getElementsByTagName playing handicapped).
Ran each test 1000 times.
Method Total ms Average ms
--------------------------------------------------
document.TreeWalker 816 0.816
Iterative Traverser 2518 2.518
Recursive Traverser 21556 21.556
XPath query 5087 5.087
querySelectorAll 5352 5.352
getElementsByTagName 660 0.66
Source for each method:
TreeWalker
function nativeTreeWalker() {
var walker = document.createTreeWalker(
document.body,
NodeFilter.SHOW_TEXT,
null,
false
);
var node;
var textNodes = [];
while(node = walker.nextNode()) {
textNodes.push(node.nodeValue);
}
}
Recursive Tree Traversal
function customRecursiveTreeWalker() {
var result = [];
(function findTextNodes(current) {
for(var i = 0; i < current.childNodes.length; i++) {
var child = current.childNodes[i];
if(child.nodeType == 3) {
result.push(child.nodeValue);
}
else {
findTextNodes(child);
}
}
})(document.body);
}
Iterative Tree Traversal
function customIterativeTreeWalker() {
var result = [];
var root = document.body;
var node = root.childNodes[0];
while(node != null) {
if(node.nodeType = 3) {
result.push(node.nodeValue);
}
if(node.hasChildNodes()) {
node = node.firstChild;
}
else {
while(node.nextSibling == null && node != root) {
node = node.parentNode;
}
node = node.nextSibling;
}
}
}
querySelectorAll
function nativeSelector() {
var elements = document.querySelectorAll("body *");
var results = [];
var child;
for(var i = 0; i < elements.length; i++) {
child = elements[i].childNodes[0];
if(elements[i].hasChildNodes() && child.nodeType == 3) {
results.push(child.nodeValue);
}
}
}
getElementsByTagName (handicap)
function getElementsByTagName() {
var elements = document.getElementsByTagName("p");
var results = [];
for(var i = 0; i < elements.length; i++) {
results.push(elements[i].childNodes[0].nodeValue);
}
}
XPath
function xpathSelector() {
var xpathResult = document.evaluate(
"//*/text()",
document,
null,
XPathResult.ORDERED_NODE_ITERATOR_TYPE,
null
);
var results = [], res;
while(res = xpathResult.iterateNext()) {
results.push(res);
}
}
Also, you might find this discussion helpful - http://bytes.com/topic/javascript/answers/153239-how-do-i-get-elements-text-node