Ok, forgive me for how large this is. I thought this was a very interesting question but while playing with it, I quickly realized that innerHTML and its ilk are quite unreliable wrt maintaining whitespace, comments, etc. With that in mind, I fell back to actually pulling down a full copy of the source so that I could be absolutely sure I got the full source. I then used jquery and a few (relatively small) regexes to find the location of each node. It seems to work well although I'm sure I've missed some edge cases. And, yeah, yeah, regexes and two problems, blah blah blah.
Edit: As an exercise in building jquery plugins, I've modified my code to function reasonably well as a standalone plugin with an example similar to the html found below (which I will leave here for posterity). I've tried to make the code slightly more robust (such as now handling tags inside quoted strings, such as onclick), but the biggest remaining bug is that it can't account for any modifications to the page, such as appending elements. I would need probably need to use an iframe instead of an ajax call to handle that case.
<html>
<head id="node0">
<!-- first comment -->
<script src="http://ajax.googleapis.com/ajax/libs/jquery/1.3.2/jquery.min.js"></script>
<style id="node1">
/* div { border: 1px solid black; } */
pre { border: 1px solid black; }
</style>
<!-- second comment -->
<script>
$(function() {
// fetch and display source
var source;
$.ajax({
url: location.href,
type: 'get',
dataType: 'text',
success: function(data) {
source = data;
var lines = data.split(/\r?\n/);
var html = $.map(lines, function(line, i) {
return ['<span id="line_number_', i, '"><strong>', i, ':</strong> ', line.replace(/</g, '<').replace(/>/g, '>'), '</span>'].join('');
}).join('\n');
// now sanitize the raw html so you don't get false hits in code or comments
var inside = false;
var tag = '';
var closing = {
xmp: '<\\/\\s*xmp\\s*>',
script: '<\\/\\s*script\\s*>',
'!--': '-->'
};
var clean_source = $.map(lines, function(line) {
if (inside && line.match(closing[tag])) {
var re = new RegExp('.*(' + closing[tag] + ')', 'i');
line = line.replace(re, "$1");
inside = false;
} else if (inside) {
line = '';
}
if (line.match(/<(script|!--)/)) {
tag = RegExp.$1;
line = line.replace(/<(script|xmp|!--)[^>]*.*(<(\/(script|xmp)|--)?>)/i, "<$1>$2");
var re = new RegExp(closing[tag], 'i');
inside = ! (re).test(line);
}
return line;
});
// nodes we're looking for
var nodes = $.map([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], function(num) { return $('#node' + num) });
// now find each desired node in both the DOM and the source
var line_numbers = $.map(nodes, function(node) {
var tag = node.attr('tagName');
var tags = $(tag);
var index = tags.index(node) + 1;
var count = 0;
for (var i = 0; i < clean_source.length; i++) {
var re = new RegExp('<' + tag, 'gi');
var matches = clean_source[i].match(re);
if (matches && matches.length) {
count += matches.length;
if (count >= index) {
console.debug(node, tag, index, count, i);
return i;
}
}
}
return count;
});
// saved till end to avoid affecting source html
$('#source_pretty').html(html);
$('#source_raw').text(source);
$('#source_clean').text(clean_source.join('\n'));
$.each(line_numbers, function() { $('#line_number_' + this).css('background-color', 'orange'); });
},
});
var false_matches = [
"<div>",
"<div>",
"</div>",
"</div>"
].join('');
});
</script>
</head>
<!-- third comment -->
<body id="node2">
<div>
<pre id="source_pretty">
</pre>
<pre id="source_raw">
</pre>
<pre id="source_clean">
</pre>
</div>
<div id="node3">
<xmp>
<code>
// <xmp> is deprecated, you should put it in <code> instead
</code>
</xmp>
</div>
<!-- fourth comment -->
<div><div><div><div><div><div><span><div id="node4"><span><span><b><em>
<i><strong><pre></pre></strong></i><div><div id="node5"><div></div></div></div></em>
</b></span><span><span id="node6"></span></span></span></div></span></div></div></div></div></div></div>
<div>
<div>
<div id="node7">
<div>
<div>
<div id="node8">
<span>
<!-- fifth comment -->
<div>
<span>
<span>
<b>
<em id="node9">
<i>
<strong>
<pre>
</pre>
</strong>
</i>
<div>
<div>
<div>
</div>
</div>
</div>
</em>
</b>
</span>
<span>
<span id="node10">
</span>
</span>
</span>
</div>
</span>
</div>
</div>
</div>
</div>
</div>
</div>
</body>
</html>