You shouldn't be using a regex for this in the first place. You are trying to do too much with one regex (see Can you provide some examples of why it is hard to parse XML and HTML with a regex? for why). What you need is an HTML parser. See Can you provide an example of parsing HTML with your favorite parser? for examples using a variety of parsers.
Take a look at HTML::Parser
. Here is a, probably incomplete, implementation:
#!/usr/bin/perl
use strict;
use warnings;
use HTML::Parser;
{
my %map = (
foo => "f",
bar => "b",
);
sub start {
my ($tag, $attr) = @_;
my $attr_string = '';
for my $key (keys %$attr) {
if ($key eq 'class') {
my @classes = split " ", $attr->{$key};
#FIXME: this should be using //, but
#it is only availble starting in 5.10
#so I am using || which will do the
#wrong thing if the class is 0, so
#don't use a class of 0 in %map , m'kay
$attr->{$key} = join " ",
map { $map{$_} || $_ } @classes;
}
$attr_string .= qq/ $key="$attr->{$key}"/;
}
print "<$tag$attr_string>";
}
}
sub text {
print shift;
}
sub end {
my $tag = shift;
print "</$tag>";
}
my $p = HTML::Parser->new(
start_h => [ \&start, "tagname,attr" ],
text_h => [ \&text, "dtext" ],
end_h => [ \&end, "tagname" ],
);
$p->parse_file(\*DATA);
__DATA__
<html>
<head>
<title>foo</title>
</head>
<body>
<span class="foo">Foo!</span> <span class="bar">Bar!</span>
<span class="foo bar">Foo Bar!</span>
This should not be touched: class="foo"
</body>
</html>