ansaurus

Question

In JavaScript, how can I replace text in an HTML page without affecting the tags?

Answer 1

+3 A:

Don't use regex to parse HTML. [X][HT]ML is not a regular language and cannot reliably be processed using regex. Your browser has a good HTML parser built-in; let that take the strain of working out where the tags are.

Also you don't really want to work on html()/innerHTML on body. This will serialise and re-parse the entire page, which will be slow and will lose any information that cannot be serialised in HTML, such as event handlers, form values and other JavaScript references.

Here's a method using DOM that seems to work for me:

function replaceInElement(element, find, replace) {
    // iterate over child nodes in reverse, as replacement may increase
    // length of child node list.
    for (var i= element.childNodes.length; i-->0;) {
        var child= element.childNodes[i];
        if (child.nodeType==1) { // ELEMENT_NODE
            var tag= child.nodeName.toLowerCase();
            if (tag!='style' && tag!='script') // special case, don't touch CDATA elements
                replaceInElement(child, find, replace);
        } else if (child.nodeType==3) { // TEXT_NODE
            replaceInText(child, find, replace);
        }
    }
}
function replaceInText(text, find, replace) {
    var match;
    var matches= [];
    while (match= find.exec(text.data))
        matches.push(match);
    for (var i= matches.length; i-->0;) {
        match= matches[i];
        text.splitText(match.index);
        text.nextSibling.splitText(match[0].length);
        text.parentNode.replaceChild(replace(match), text.nextSibling);
    }
}

// keywords to match. This *must* be a 'g'lobal regexp or it'll fail bad
var find= /\b(keyword|whatever)\b/gi;

// replace matched strings with wiki links
replaceInElement(document.body, find, function(match) {
    var link= document.createElement('a');
    link.href= 'http://en.wikipedia.org/wiki/'+match[0];
    link.appendChild(document.createTextNode(match[0]));
    return link;
});

bobince 2009-09-18 14:23:41

`i-->0` Clever. I've never seen that before.

Patrick McElhaney 2009-09-18 14:40:08

I can't claim credit for that, it's an idiom for reverse-iteration in C-like languages! :-)

bobince 2009-09-18 14:47:20

I usually use just `i--`, as in: `for (var i=100; i--; )`

kangax 2009-09-18 16:18:02

Yep, that'll work too for lower bound 0. The explicit `>0` is also a defensive measure for cases where `i` might be able to start off negative (which would loop endlessly).

bobince 2009-09-18 17:39:26

What I liked about `i-->0` is that I first read it as i→0, or "i approaches zero."

Patrick McElhaney 2009-09-18 18:31:28

ansaurus

tags:

views:

answers:

In JavaScript, how can I replace text in an HTML page without affecting the tags?

related questions