I am trying to get a value out of a HTML page using the python HTMLParser library. The value I want to get hold of is within this html element:
...
<div id="remository">20</div>
...
This is my HTMLParser class so far:
class LinksParser(HTMLParser.HTMLParser):
def __init__(self):
HTMLParser.HTMLParser.__init__(self)
self.seen = {}
def handle_starttag(self, tag, attributes):
if tag != 'div': return
for name, value in attributes:
if name == 'id' and value == 'remository':
#print value
return
def handle_data(self, data):
print data
p = LinksParser()
f = urllib.urlopen("http://domain.com/somepage.html")
html = f.read()
p.feed(html)
p.close()
Can someone point me in the right direction? I want the class functionality to get the value 20.