Firstly, you need to use arrays for this, not regex, because they will be faster. Regex is orders of magnitude more complex, and thus too heavy. As Atwood says, a programmer thinks he can solve a problem with a regex. Then he has two problems.
So, a quick implementation that uses your list of garbage strings, and does the job, exploiting javascript's built-in dictionary speed to check whether a word is garbage or not, and with handling for punctuation is given below. There's a little test page you can try it out on.
function splitwords(str) {
var unpunctuated = unpunctuate(str);
var splitted = unpunctuated.split(" ");
return splitted;
}
function unpunctuate(str) {
var punctuation = ['.', ',', ';', ':', '-'];
var unpunctuated = str;
for(punctidx in punctuation) {
punct = punctuation[punctidx];
// this line removes punctuation. to keep it, swap in the line below.
//unpunctuated = unpunctuated.replace(punct," "+punct+" ");
unpunctuated = unpunctuated.replace(punct,"");
}
return unpunctuated;
}
var garbageStrings = ['of', 'the', "in", "on", "at", "to", "a", "is"];
var garbagedict= {};
for(garbstr in garbageStrings) {
garbagedict[garbageStrings[garbstr]] = 1;
}
function remove(str) {
words = splitwords(str);
keeps = [];
for(wordidx in words) {
word = words[wordidx];
if(word in garbagedict) {
// ignore
} else {
keeps.push(word);
}
}
return keeps.join(" ");
}