tags:

views:

87

answers:

2

Initially, I used this function:

function findIndexesMultiPass(find,str) {
    var x, output = {};

    for (var i = 0; i < find.length; i++) {
        output[find[i]] = [];
        x = 0;
        while ((x = str.indexOf(find[i], x)) > -1) {
            output[find[i]].push(x++);
        }
    }

    return output;
}

var search = ['a','b'];
var testText = "abcd abcd abcd";
var result = findIndexes(search,testText);
    // {'a':[0,5,10], 'b':[1,6,11]}

which looks through testText multiple times for each of the items in the search array and records their indexes.

It would probably be more efficient to only look through my string once. So I rewrote the function as:

function findIndexesOnePass(find,str) {
    var output = {};

    for (var i = 0; i < find.length; i++) {
        output[find[i]] = [];
    }

    for (var i = 0; i < str.length; i++) {
        var currentChar = str.charAt(i);
        if (output[currentChar] !== undefined) {
            output[currentChar].push(i);
        }
    }

    return output;
}

var search = ['a','b'];
var testText = "abcd abcd abcd";
var result = findIndexes(search,testText);
    // {'a':[0,5,10], 'b':[1,6,11]}

But, I'm not sure how efficient it is to check every single character of my string and see if a key was defined in the output object. What's the best way to do this?

EDIT:
I timed my functions and Guffa's regex suggestion and got these numbers:

Google Chrome (Mac)
findIndexesMultiPass: 44ms
findIndexesOnePass: 799ms
findIndexesRegEx: 95ms

Safari
findIndexesMultiPass: 48ms
findIndexesOnePass: 325ms
findIndexesRegEx: 293ms

Firefox
findIndexesMultiPass: 56ms
findIndexesOnePass: 369ms
findIndexesRegEx: 786ms

(http://shiing.com/indexTest.html)

It looks as though the first solution isn't bad at all.

EDIT: In the times posted above, the array size was only 3. I increased it to 26 for another test and got these numbers:

Chrome
findIndexesMultiPass: 534ms
findIndexesOnePass: 399ms
findIndexesRegEx: 1232ms

Safari
findIndexesMultiPass: 597ms
findIndexesOnePass: 582ms
findIndexesRegEx: 7431ms    

Firefox
findIndexesMultiPass: 604ms
findIndexesOnePass: 770ms
findIndexesRegEx: 8329ms

I'll probably only need to search for 3 characters so the first function will work.

A: 

Assuming few letters to search for and many letters to search against (i.e. low number of letter, long strings), the latter is the most efficient, since you only go through the string once, and then test each letter.

The other one goes through the string as many times as there are letters to search for.

Will Hartung
+2  A: 

You should be able to use a regular expression to find all occurances of each character. Something like:

function findIndexes(find, str) {
  var output = {};
  for (var i = 0; i < find.length; i++) {
    var m = [];
    var r = new RegExp('.*?' + find[i], 'g');
    var ofs = -1;
    while ((x = r.exec(str)) != null) {
      ofs += x[0].length;
      m.push(ofs);
    }
    output[find[i]] = m;
  }
  return output;
}

Edit:

Did some changes, and now it works. :) However, as Javascript doesn't have a matches method to get all matches at once, it's not really any improvment over using indexOf... :P

Edit 2:

However, you can use a regular expression to find any of the characters, so you only need to loop the string once instead of once for each character. :)

function findIndexes(find, str) {
  var output = {};
  for (var i = 0; i < find.length; i++) output[find[i]] = [];
  var r = new RegExp('.*?[' + find.join('') + ']', 'g');
  var ofs = -1;
  while ((x = r.exec(str)) != null) {
    ofs += x[0].length;
    output[x[0].substr(x[0].length-1,1)].push(ofs);
  }
  return output;
}
Guffa