You can't do this kind of thing with regex at all. Work on the document objects which are already nicely parsed into a structure for you.
Here's a keyword linker adapted from this question.
// Find text in descendents of an element, in reverse document order
// pattern must be a regexp with global flag
//
function findTextExceptInLinks(element, pattern, callback) {
for (var childi= element.childNodes.length; childi-->0;) {
var child= element.childNodes[childi];
if (child.nodeType===1) {
if (child.tagName.toLowerCase()!=='a')
findTextExceptInLinks(child, pattern, callback);
} else if (child.nodeType===3) {
var matches= [];
var match;
while (match= pattern.exec(child.data))
matches.push(match);
for (var i= matches.length; i-->0;)
callback.call(window, child, matches[i]);
}
}
}
findTextExceptInLinks(document.body, /\bmatching phrase\b/g, function(node, match) {
node.splitText(match.index+match[0].length);
var a= document.createElement('a');
a.href= 'http://www.example.com/myurl';
a.appendChild(node.splitText(match.index));
node.parentNode.insertBefore(a, node.nextSibling);
});
eta re comments: Here's a version of the same thing using plain text matching rather than regex:
function findPlainTextExceptInLinks(element, substring, callback) {
for (var childi= element.childNodes.length; childi-->0;) {
var child= element.childNodes[childi];
if (child.nodeType===1) {
if (child.tagName.toLowerCase()!=='a')
findPlainTextExceptInLinks(child, substring, callback);
} else if (child.nodeType===3) {
var index= child.data.length;
while (true) {
index= child.data.lastIndexOf(substring, index);
if (index===-1)
break;
callback.call(window, child, index)
}
}
}
}
var substring= 'matching phrase';
findPlainTextExceptInLinks(document.body, substring, function(node, index) {
node.splitText(index+substring.length);
var a= document.createElement('a');
a.href= 'http://www.example.com/myurl';
a.appendChild(node.splitText(index));
node.parentNode.insertBefore(a, node.nextSibling);
});