I would get rid of Regex and look at Beautiful Soup.
findAll(True)
lists all the tags found in your source.
from BeautifulSoup import BeautifulSoup
soup = BeautifulSoup(source)
allTags = soup.findAll(True)
[tag.name for tag in allTags ]
[u'em', u'label']
then you just need to remove possible duplicates and confront your tags lists.
This snippet verifies that ALL of source's tags are present in target's tags.
from BeautifulSoup import BeautifulSoup
def get_tags_set(source):
soup = BeautifulSoup(source)
all_tags = soup.findAll(True)
return set([tag.name for tag in all_tags])
def verify(tags_source_orig, tags_source_to_verify):
return tags_source_orig == set.intersection(tags_source_orig, tags_source_to_verify)
source= '<label>What\'s your name</label><label>What\'s your name</label><em>Hello</em>'
source_to_verify= '<em>Hello</em><label>What\'s your name</label><label>What\'s your name</label>'
print verify(get_tags_set(source),get_tags_set(source_to_verify))