The trick is to basically generate a probe function that will check if a given name is the name of a nested (first-level) function. The probe function uses the function body of the original function, prefixed with code to check the given name within the scope of the probe function. OK, this can be better explained with the actual code:
function splitFunction(fn) {
var tokens =
/^[\s\r\n]*function[\s\r\n]*([^\(\s\r\n]*?)[\s\r\n]*\([^\)\s\r\n]*\)[\s\r\n]*\{((?:[^}]*\}?)+)\}\s*$/
.exec(fn);
if (!tokens) {
throw "Invalid function.";
}
return {
name: tokens[1],
body: tokens[2]
};
}
var probeOutside = function () {
return eval(
"typeof $fn$ === \"function\""
.split("$fn$")
.join(arguments[0]));
};
function extractFunctions(fn) {
var fnParts = splitFunction(fn);
var probeInside = new Function(
splitFunction(probeOutside).body + fnParts.body);
var tokens;
var fns = [];
var tokenRe = /(\w+)/g;
while ((tokens = tokenRe.exec(fnParts.body))) {
var token = tokens[1];
try {
if (probeInside(token) && !probeOutside(token)) {
fns.push(token);
}
} catch (e) {
// ignore token
}
}
return fns;
}
Runs fine against the following on Firefox, IE, Safari, Opera and Chrome:
function testGlobalFn() {}
function testSuite() {
function testA() {
function testNested() {
}
}
// function testComment() {}
// function testGlobalFn() {}
function // comments
testB /* don't matter */
() // neither does whitespace
{
var s = "function testString() {}";
}
}
document.write(extractFunctions(testSuite));
// writes "testA,testB"
Edit by Christoph, with inline answers by Ates:
Some comments, questions and suggestions:
Is there a reason for checking
typeof $fn$ !== "undefined" && $fn$ instanceof Function
instead of using
typeof $fn$ === "function"
instanceof
is less safe than using typeof
because it will fail when passing objects between frame boundaries. I know that IE returns wrong typeof
information for some built-in functions, but afaik instanceof
will fail in these cases as well, so why the more complicated but less safe test?
[AG] There was absolutely no legitimate reason for it. I've changed it to the simpler "typeof === function" as you suggested.
How are you going to prevent the wrongful exclusion of functions for which a function with the same name exists in the outer scope, e.g.
function foo() {}
function TestSuite() {
function foo() {}
}
[AG] I have no idea. Can you think of anything. Which one is better do you think? (a) Wrongful exclusion of a function inside. (b) Wronfgul inclusion of a function outside.
I started to think that the ideal solution will be a combination of your solution and this probing approach; figure out the real function names that are inside the closure and then use probing to collect references to the actual functions (so that they can be directly called from outside).
- It might be possible to modify your implementation so that the function's body only has to be
eval()
'ed once and not once per token, which is rather inefficient. I might try to see what I can come up with when I have some more free time today...
[AG] Note that the entire function body is not eval'd. It's only the bit that's inserted to the top of the body.
[CG] Your right - the function's body only gets parsed once during the creation of probeInside
- you did some nice hacking, there ;). I have some free time today, so let's see what I can come up with...
A solution that uses your parsing method to extract the real function names could just use one eval to return an array of references to the actual functions:
return eval("[" + fnList + "]");
[CG] Here is with what I came up. An added bonus is that the outer function stays intact and thus may still act as closure around the inner functions. Just copy the code into a blank page and see if it works - no guarantees on bug-freelessness ;)
<pre><script>
var extractFunctions = (function() {
var level, names;
function tokenize(code) {
var code = code.split(/\\./).join(''),
regex = /\bfunction\b|\(|\)|\{|\}|\/\*|\*\/|\/\/|"|'|\n|\s+|\\/mg,
tokens = [],
pos = 0;
for(var matches; matches = regex.exec(code); pos = regex.lastIndex) {
var match = matches[0],
matchStart = regex.lastIndex - match.length;
if(pos < matchStart)
tokens.push(code.substring(pos, matchStart));
tokens.push(match);
}
if(pos < code.length)
tokens.push(code.substring(pos));
return tokens;
}
function parse(tokens, callback) {
for(var i = 0; i < tokens.length; ++i) {
var j = callback(tokens[i], tokens, i);
if(j === false) break;
else if(typeof j === 'number') i = j;
}
}
function skip(tokens, idx, limiter, escapes) {
while(++idx < tokens.length && tokens[idx] !== limiter)
if(escapes && tokens[idx] === '\\') ++idx;
return idx;
}
function removeDeclaration(token, tokens, idx) {
switch(token) {
case '/*':
return skip(tokens, idx, '*/');
case '//':
return skip(tokens, idx, '\n');
case ')':
tokens.splice(0, idx + 1);
return false;
}
}
function extractTopLevelFunctionNames(token, tokens, idx) {
switch(token) {
case '{':
++level;
return;
case '}':
--level;
return;
case '/*':
return skip(tokens, idx, '*/');
case '//':
return skip(tokens, idx, '\n');
case '"':
case '\'':
return skip(tokens, idx, token, true);
case 'function':
if(level === 1) {
while(++idx < tokens.length) {
token = tokens[idx];
if(token === '(')
return idx;
if(/^\s+$/.test(token))
continue;
if(token === '/*') {
idx = skip(tokens, idx, '*/');
continue;
}
if(token === '//') {
idx = skip(tokens, idx, '\n');
continue;
}
names.push(token);
return idx;
}
}
return;
}
}
function getTopLevelFunctionRefs(func) {
var tokens = tokenize(func.toString());
parse(tokens, removeDeclaration);
names = [], level = 0;
parse(tokens, extractTopLevelFunctionNames);
var code = tokens.join('') + '\nthis._refs = [' +
names.join(',') + '];';
return (new (new Function(code)))._refs;
}
return getTopLevelFunctionRefs;
})();
function testSuite() {
function testA() {
function testNested() {
}
}
// function testComment() {}
// function testGlobalFn() {}
function // comments
testB /* don't matter */
() // neither does whitespace
{
var s = "function testString() {}";
}
}
document.writeln(extractFunctions(testSuite).join('\n---\n'));
</script></pre>
Not as elegant as LISP-macros, but still nice what JAvaScript is capable of ;)