I don't know how efficient it is, but you could always xor the two strings you are matching, and find the index of the first mismatch.
#! /usr/bin/env perl
use strict;
use warnings;
use 5.10.1;
my $str_source = "ATTCCGGG";
my $str1 = "ATTGCGGG";
my $str2 = "ATACCGGC";
my $str3 = "GTTCCGGG";
# this returns the index of all of the mismatches (zero based)
# it returns an empty list if the two strings match.
sub diff_index{
my($a,$b) = @_;
my $cmp = $a^$b;
my @cmp;
while( $cmp =~ /[^\0]/g ){ # match non-zero byte
push @cmp, pos($cmp) - 1;
}
return @cmp;
}
for my $str ( $str_source, $str1, $str2, $str3 ){
say '# "', $str, '"';
my @ret = diff_index $str_source, $str;
if( @ret ){
say '[ ', join( ', ', @ret), ' ]';
}else{
say '# match';
}
}
# "ATTCCGGG"
# match
# "ATTGCGGG"
[ 3 ]
# "ATACCGGC"
[ 2, 7 ]
# "GTTCCGGG"
[ 0 ]
Running it through B::Concise shows that the CPU expensive operations, happen as single opcodes. Which means that those operations are run in C.
perl -MO=Concise,-exec,-compact,-src,diff_index test.pl |
perl -pE's/^[^#].*? \K([^\s]+)$/# $1/' # To fix highlighting bugs
main::diff_index:
# 15: my($a,$b) = @_;
1 <;> nextstate(main 53 test.pl:15) # v:%,*,&,$
2 <0> pushmark # s
3 <$> gv(*_) # s
4 <1> rv2av[t3] # lK/3
5 <0> pushmark # sRM*/128
6 <0> padsv[$a:53,58] # lRM*/LVINTRO
7 <0> padsv[$b:53,58] # lRM*/LVINTRO
8 <2> aassign[t4] # vKS
# 16: my $cmp = $a^$b;
9 <;> nextstate(main 54 test.pl:16) # v:%,*,&,$
a <0> padsv[$a:53,58] # s
b <0> padsv[$b:53,58] # s
c <2> bit_xor[t6] # sK <----- Single OP -----
d <0> padsv[$cmp:54,58] # sRM*/LVINTRO
e <2> sassign # vKS/2
# 18: my @cmp;
f <;> nextstate(main 55 test.pl:18) # v:%,*,&,{,$
g <0> padav[@cmp:55,58] # vM/LVINTRO
# 20: while( $cmp =~ /[^\0]/g ){ # match non-zero byte
h <;> nextstate(main 57 test.pl:20) # v:%,*,&,{,$
i <{> enterloop(next->r last->v redo->j) # v
s <0> padsv[$cmp:54,58] # s
t </> match(/"[^\\0]"/) # sKS/RTIME <----- Single OP -----
u <|> and(other->j) # vK/1
# 21: push @cmp, pos($cmp) - 1;
j <;> nextstate(main 56 test.pl:21) # v:%,*,&,$
k <0> pushmark # s
l <0> padav[@cmp:55,58] # lRM
m <0> padsv[$cmp:54,58] # sRM
n <1> pos[t8] # sK/1
o <$> const(IV 1) # s
p <2> subtract[t9] # sK/2
q <@> push[t10] # vK/2
r <0> unstack # v
goto # s
v <2> leaveloop # vK/2
# 24: return @cmp;
w <;> nextstate(main 58 test.pl:24) # v:%,*,&,{,$
x <0> pushmark # s
y <0> padav[@cmp:55,58]
z <@> return # K
10 <1> leavesub[1 ref] # K/REFC,1