Beware: regular expressions cannot correctly parse HTML.
Use a parser instead.
#! /usr/bin/perl
use warnings;
use strict;
use HTML::Parser;
die "Usage: $0 html-file\n" unless @ARGV == 1;
sub start {
my($tag,$attr,$attrseq,$text,$skipped) = @_;
print $skipped;
unless ($attr->{style} && $attr->{style} =~ /width|height/) {
print $text;
return;
}
my %style = $attr->{style} =~ /
\s* # optional leading space
(.+?) : # property, e.g., width
\s* # optional separating space
([^;]+) # value, e.g., 100px
;? # optional separator
/gx;
delete @style{qw/ width height /};
$attr->{style} = join "; " =>
map "$_: $style{$_}",
keys %style;
print "<$tag ",
join(" " => map qq[$_="$attr->{$_}"], @$attrseq),
">";
}
my $p = HTML::Parser->new(
api_version => 3,
marked_sections => 1,
start_h => [ \&start => "tag, attr, attrseq, text, skipped_text" ],
end_h => [ sub { print @_ } => "skipped_text, text" ],
);
undef $/;
$p->parse(<>);