views:

2106

answers:

8

In javascript, is there an equivalent of String.indexOf() that takes a regular expression instead of a string for the first first parameter while still allowing a second parameter ?

I need to do something like

str.indexOf(/[abc]/ , i);

and

str.lastIndexOf(/[abc]/ , i);

While String.search() takes a regexp as a parameter it does not allow me to specify a second argument!

Edit:
This turned out to be harder than I originally thought so I wrote a small test function to test all the provided solutions... it assumes regexIndexOf and regexLastIndexOf have been added to the String object.

function test (str) {
 var i = str.length +2;
 while (i--) {
  if (str.indexOf('a',i) != str.regexIndexOf(/a/,i)) 
   alert (['failed regexIndexOf ' , str,i , str.indexOf('a',i) , str.regexIndexOf(/a/,i)]) ;
  if (str.lastIndexOf('a',i) != str.regexLastIndexOf(/a/,i) ) 
   alert (['failed regexLastIndexOf ' , str,i,str.lastIndexOf('a',i) , str.regexLastIndexOf(/a/,i)]) ;
 }
}

and I am testing as follow to make sure that at least for one character regexp, the result is the same as if we used indexOf

//Look for the a among the xes
test('xxx');
test('axx');
test('xax');
test('xxa');
test('axa');
test('xaa');
test('aax');
test('aaa');

A: 

You could use substr.

str.substr(i).match(/[abc]/);
Glomek
From the well-known JavaScript book published by O'Reilly: "substr has not been standardized by ECMAScript and is therefore deprecated." But I like the basic idea behind what you are getting at.
Jason Bunting
That's a non-issue. If you're REALLY concerned about it, use String.substring() instead - you just have to do the math a bit differently. Besides, JavaScript should not be 100% beholden to it's parent language.
Peter Bailey
It's not a non-issue - if you get your code running against an implementation that doesn't implement substr because they want to adhere to the ECMAScript standards, you are going to have problems. Granted, replacing it with substring is not that hard to do, but it is good to be cognizant of this.
Jason Bunting
A: 

The string object's search method accepts a RegExp and returns the character position index of the first match.

Glenn
from the question: While String.search() takes a regexp as a parameter it does not allow me to specify a second argument!
Pat
Just one piece of the puzzle...
Jason Bunting
str.substr(i).search(/re/)
Glenn
+2  A: 

It does not natively, but you certainly can add this functionality

<script type="text/javascript">

String.prototype.regexIndexOf = function( pattern, startIndex )
{
    startIndex = startIndex || 0;
    var searchResult = this.substr( startIndex ).search( pattern );
    return ( -1 === searchResult ) ? -1 : searchResult + startIndex;
}

String.prototype.regexLastIndexOf = function( pattern, startIndex )
{
    startIndex = startIndex === undefined ? this.length : startIndex;
    var searchResult = this.substr( 0, startIndex ).reverse().regexIndexOf( pattern, 0 );
    return ( -1 === searchResult ) ? -1 : this.length - ++searchResult;
}

String.prototype.reverse = function()
{
    return this.split('').reverse().join('');
}

// Indexes 0123456789
var str = 'caabbccdda';

alert( [
        str.regexIndexOf( /[cd]/, 4 )
    ,   str.regexLastIndexOf( /[cd]/, 4 )
    ,   str.regexIndexOf( /[yz]/, 4 )
    ,   str.regexLastIndexOf( /[yz]/, 4 )
    ,   str.lastIndexOf( 'd', 4 )
    ,   str.regexLastIndexOf( /d/, 4 )
    ,   str.lastIndexOf( 'd' )
    ,   str.regexLastIndexOf( /d/ )
    ]
);

</script>

I didn't fully test these methods, but they seem to work so far.

Peter Bailey
I am testing it and it seems to work except when the regexp is not found ...
Pat
Updated to handle those cases
Peter Bailey
everytime i am about to accept this answer i find a new case ! These give different results!alert( [str.lastIndexOf( /[d]/, 4 ), str.regexLastIndexOf( /[d]/, 4 )]);
Pat
well, of course they are - str.lastIndexOf will do type coercion on the pattern - converting it into a string. The string "/[d]/" most certainly is not found in the input, so the -1 returned is actually accurate.
Peter Bailey
yes sure, I meant str.lastIndexOf( 'd', 4 )
Pat
Got it. After reading the spec on String.lastIndexOf() - I just misunderstood how that argument worked. This new version should handle it.
Peter Bailey
Something is still not right, but it is getting to late ... I'll try to get a test case, and maybe fix it in the morning. Sorry for the trouble so far.
Pat
Ya - I see a fatal flaw in my approach for regexLastIndexOf() that MizardX's solution does better. I'll see if I can cobble something together that encapsulates all this
Peter Bailey
I just added the test function to the question ... this fails this test (among others) 'axx'.lastIndexOf('a',1) != 'axx'.regexLastIndexOf(/a/,1)
Pat
+2  A: 

Based on BaileyP's answer. The main difference is that these methods return -1 if the pattern can't be matched.

Edit: Thanks to Jason Bunting's answer I got an idea. Why not modify the .lastIndex property of the regex? Though this will only work for patterns with the global flag (/g).

Edit: Updated to pass the test-cases.

String.prototype.regexIndexOf = function(re, startPos) {
    startPos = startPos || 0;

    if (!re.global) {
        var flags = "g" + (re.multiline?"m":"") + (re.ignoreCase?"i":"");
        re = new RegExp(re.source, flags);
    }

    re.lastIndex = startPos;
    var match = re.exec(this);

    if (match) return match.index;
    else return -1;
}

String.prototype.regexIndexOf = function(re, startPos) {
    startPos = startPos === undefined ? this.length : startPos;

    if (!re.global) {
        var flags = "g" + (re.multiline?"m":"") + (re.ignoreCase?"i":"");
        re = new RegExp(re.source, flags);
    }

    var lastSuccess = -1;
    for (var pos = 0; pos <= startPos; pos++) {
        re.lastIndex = pos;

        var match = re.exec(this);
        if (!match) break;

        pos = match.index;
        if (pos <= startPos) lastSuccess = pos;
    }

    return lastSuccess;
}
MizardX
This seems the most promising so far (after a few sytax fixes) :-)Only failing a few tests on the edge conditions. Things like 'axx'.lastIndexOf('a',0) != 'axx'.regexLastIndexOf(/a/,0) ... I am looking into it to see if I can fix those cases
Pat
A: 

Well, as you are just looking to match the position of a character , regex is possibly overkill.

I presume all you want is, instead of "find first of these this character" , just find first of these characters.

This of course is the simple answer, but does what your question sets out to do, albeit without the regex part ( because you didn't clarify why specifically it had to be a regex )

function mIndexOf( str , chars, offset )
{
   var first  = -1; 
   for( var i = 0; i < chars.length;  i++ )
   {
      var p = str.indexOf( chars[i] , offset ); 
      if( p < first || first === -1 )
      {
           first = p;
      }
   }
   return first; 
}
String.prototype.mIndexOf = function( chars, offset )
{
   return mIndexOf( this, chars, offset ); # I'm really averse to monkey patching.  
};
mIndexOf( "hello world", ['a','o','w'], 0 );
>> 4 
mIndexOf( "hello world", ['a'], 0 );
>> -1 
mIndexOf( "hello world", ['a','o','w'], 4 );
>> 4
mIndexOf( "hello world", ['a','o','w'], 5 );
>> 6
mIndexOf( "hello world", ['a','o','w'], 7 );
>> -1 
mIndexOf( "hello world", ['a','o','w','d'], 7 );
>> 10
mIndexOf( "hello world", ['a','o','w','d'], 10 );
>> 10
mIndexOf( "hello world", ['a','o','w','d'], 11 );
>> -1
Kent Fredric
Just a comment about monkey patching - while I'm aware of its problems - you think polluting the global namespace is better? It's not like symbol conflicts in BOTH cases can't happen, and are basically refactored/repaired in the same way should a problem arise.
Peter Bailey
Well I need to search for \s and in some cases \W and was hoping I didn't have to enumerate all possibilities.
Pat
BaileyP: you can get around this problem without global namespace pollution, ie: see jQuery for example. use that model. one object for project, your stuff goes inside it. Mootools left a bad taste in my mouth.
Kent Fredric
also to be noted i never code like i wrote there. the example was simplified for use-case reasons.
Kent Fredric
+2  A: 

Combining a few of the approaches already mentioned (the indexOf is obviously rather simple), I think these are the functions that will do the trick:

String.prototype.regexIndexOf = function(regex, startpos) {
    var indexOf = this.substring(startpos || 0).search(regex);
    return (indexOf >= 0) ? (indexOf + (startpos || 0)) : indexOf;
}

String.prototype.regexLastIndexOf = function(regex, startpos) {
    regex = (regex.global) ? regex : new RegExp(regex.source, "g" + (regex.ignoreCase ? "i" : "") + (regex.multiLine ? "m" : ""));
    if(typeof (startpos) == "undefined") {
        startpos = this.length;
    } else if(startpos < 0) {
        startpos = 0;
    }
    var stringToWorkWith = this.substring(0, startpos + 1);
    var lastIndexOf = -1;
    var nextStop = 0;
    while((result = regex.exec(stringToWorkWith)) != null) {
        lastIndexOf = result.index;
        regex.lastIndex = ++nextStop;
    }
    return lastIndexOf;
}

Obviously, modifying the built-in String object would send up red flags for most people, but this may be one time when it is not that big of a deal; simply be aware of it.


UPDATE: Edited regexLastIndexOf() so that is seems to mimic lastIndexOf() now. Please let me know if it still fails and under what circumstances.


UPDATE: Passes all tests found on in comments on this page, and my own. Of course, that doesn't mean it's bulletproof. Any feedback appreciated.

Jason Bunting
Your `regexLastIndexOf` will only return the index of the last *non-overlapping* match.
MizardX
Sorry, not a HUGE regex guy - can you give me an example that would make mine fail? I appreciate being able to learn more, but your response doesn't help someone as ignorant as I am. :)
Jason Bunting
Jason I just added some function to test in the question. this is failing (among other tests) the following 'axx'.lastIndexOf('a',2) != 'axx'.regexLastIndexOf(/a/,2)
Pat
Okay, I got it to pass that test and spent more time looking up relevant details.
Jason Bunting
"aaaaa".regexLastIndexOf(/aaa/). It would find the first three a's, then try to match again on the last two a's, which would fail. "aaaaa".lastIndexOf("aaa") finds the last three a's.
MizardX
Ah - gotcha. Well, I am done with this for now - I don't have the time to do anything further. :( It's been fun though.
Jason Bunting
Nevermind, I took another stab at it. :) More feedback appreciated.
Jason Bunting
I finally got time to benchmark the proposed solutions and yours came out on top so I am accepting it for now.
Pat
A: 

After having all the proposed solutions fail my tests one way or the other, (edit: some were updated to pass the tests after I wrote this) I found the mozilla implementation for Array.indexOf and Array.lastIndexOf

I used those to implement my version of String.prototype.regexIndexOf and String.prototype.regexLastIndexOf as follows:

String.prototype.regexIndexOf = function(elt /*, from*/)
  {
 var arr = this.split('');
 var len = arr.length;

 var from = Number(arguments[1]) || 0;
 from = (from < 0) ? Math.ceil(from) : Math.floor(from);
 if (from < 0)
   from += len;

 for (; from < len; from++) {
   if (from in arr && elt.exec(arr[from]) ) 
  return from;
 }
 return -1;
};

String.prototype.regexLastIndexOf = function(elt /*, from*/)
  {
 var arr = this.split('');
 var len = arr.length;

 var from = Number(arguments[1]);
 if (isNaN(from)) {
   from = len - 1;
 } else {
   from = (from < 0) ? Math.ceil(from) : Math.floor(from);
   if (from < 0)
  from += len;
   else if (from >= len)
  from = len - 1;
 }

 for (; from > -1; from--) {
   if (from in arr && elt.exec(arr[from]) )
  return from;
 }
 return -1;
  };

They seem to pass the test functions I provided in the question.

Obviously they only work if the regular expression matches one character but that is enough for my purpose since I will be using it for things like ( [abc] , \s , \W , \D )

I will keep monitoring the question in case someone provides a better/faster/cleaner/more generic implementation that works on any regular expression.

Pat
Wow, that is a long bit of code. Please check my updated answer and provide feedback. Thanks.
Jason Bunting
This implementation aims for absolute compatibility with lastIndexOf in Firefox and the SpiderMonkey JavaScript engine, including in several cases which are arguably edge cases. [...] in real-world applications, you may be able to calculate from with less complicated code if you ignore those cases.
Pat
Form the mozilla page :-) I just took the code ad change two lines leaving all the edge cases. Since a couple of the other answers were updated to pass the tests, I will try benchmarking them and accept the most efficent. When I have time to revisit the issue.
Pat
I updated my solution and appreciate any feedback or things that cause it to fail. I made a change to fix the overlapping problem pointed out by MizardX (hopefully!)
Jason Bunting
A: 

RexExp instances have a lastIndex property already (if they are global) and so what I'm doing is copying the regular expression, modifying it slightly to suit our purposes, exec-ing it on the string and looking at the lastIndex. This will inevitably be faster than looping on the string. (You have enough examples of how to put this onto the string prototype, right?)

function reIndexOf(reIn, str, startIndex) {
    var re = new RegExp(reIn.source, 'g' + (reIn.ignoreCase ? 'i' : '') + (reIn.multiLine ? 'm' : ''));
    re.lastIndex = startIndex || 0;
    var res = re.exec(str);
    if(!res) return -1;
    return re.lastIndex - res[0].length;
};

function reLastIndexOf(reIn, str, startIndex) {
    var src = /\$$/.test(reIn.source) && !/\\\$$/.test(reIn.source) ? reIn.source : reIn.source + '(?![\\S\\s]*' + reIn.source + ')';
    var re = new RegExp(src, 'g' + (reIn.ignoreCase ? 'i' : '') + (reIn.multiLine ? 'm' : ''));
    re.lastIndex = startIndex || 0;
    var res = re.exec(str);
    if(!res) return -1;
    return re.lastIndex - res[0].length;
};

reIndexOf(/[abc]/, "tommy can eat");  // Returns 6
reIndexOf(/[abc]/, "tommy can eat", 8);  // Returns 11
reLastIndexOf(/[abc]/, "tommy can eat"); // Returns 11

You could also prototype the functions onto the RegExp object:

RegExp.prototype.indexOf = function(str, startIndex) {
    var re = new RegExp(this.source, 'g' + (this.ignoreCase ? 'i' : '') + (this.multiLine ? 'm' : ''));
    re.lastIndex = startIndex || 0;
    var res = re.exec(str);
    if(!res) return -1;
    return re.lastIndex - res[0].length;
};

RegExp.prototype.lastIndexOf = function(str, startIndex) {
    var src = /\$$/.test(this.source) && !/\\\$$/.test(this.source) ? this.source : this.source + '(?![\\S\\s]*' + this.source + ')';
    var re = new RegExp(src, 'g' + (this.ignoreCase ? 'i' : '') + (this.multiLine ? 'm' : ''));
    re.lastIndex = startIndex || 0;
    var res = re.exec(str);
    if(!res) return -1;
    return re.lastIndex - res[0].length;
};


/[abc]/.indexOf("tommy can eat");  // Returns 6
/[abc]/.indexOf("tommy can eat", 8);  // Returns 11
/[abc]/.lastIndexOf("tommy can eat"); // Returns 11

A quick explanation of how I am modifying the RegExp: For indexOf I just have to ensure that the global flag is set. For lastIndexOf of I am using a negative look-ahead to find the last occurrence unless the RegExp was already matching at the end of the string.

Prestaul