Hey everyone I don't know whether I am doing the right thing here, basically I want both of my class to be json-serializable.
import json
class gpagelet(json.JSONEncoder):
Holds 1) the pagelet xpath, which is a string
2) the list of pagelet shingles, list
def __init__(self, parent):
if not isinstance( parent, gwebpage):
raise Exception("Parent must be an instance of gwebpage")
self.parent = parent # This must be a gwebpage instance
self.xpath = None # This is just an id for the pagelet (not unique across page), historically called xpath
self.visibleShingles = []
self.invisibleShingles = []
self.urls = []
def __str__(self):
"""String representation of this object"""
ret = ""
ret += "xpath: %s\n" % self.xpath
def appendShingles():
ret += "shingles: \n"
for each in self.shingles:
ret += "%s\n" % str(each)
ret += "urls:\n"
for each in self.urls:
ret += "%s\n" % str( each)
return ret
class gwebpage(json.JSONEncoder):
Holds all the datastructure after the results have been parsed
holds: 1) lists of gpagelets
2) loc, string, location of the file that represents it
def __init__(self, url):
self.url = url # This will be http://
self.netloc = False # This will be http:// too
self.gpagelets = []
# Appended by functions
self.page_key = ""
def __str__(self):
ret = ""
ret += "url: %s\n" % self.url
ret += "netloc: %s\n" % self.netloc
ret += "page_key: %s\n" % self.page_key
ret += "pagelets:\n"
for each in self.gpagelets:
ret += "%s\n" % each.__str__()
return ret
class GpageletEncoder( json.JSONEncoder):
def default(self, gp):
gwebpageEncoder = GwebpageEncoder()
if not isinstance( gp, gpagelet):
raise Exception( "Cannot use GpageletEncoder on a non gpagelet instance")
u = { }
u['parent'] = gwebpageEncoder.default( gp.parent)
u['xpath'] = gp.xpath
u['visibleShingles'] = gp.visibleShingles
u['invisibleShingles'] = gp.invisibleShingles
u['urls'] = gp.urls
return u
class GwebpageEncoder( json.JSONEncoder):
def default(self, gw):
gpageletEncoder = GpageletEncoder()
if not isinstance( gw, gwebpage):
raise Exception( "Cannot use gwebpageEncoder on a non gwebpage instance")
u = { }
u['url'] = gw.url
u['netloc'] = gw.netloc
u['gpagelets'] = [ gpageletEncoder.default( each) for each in gw.gpagelets ]
u['page_key'] = gw.page_key
return u
if __name__ == "__main__":
import simplejson
mom = gwebpage('http://www.google.com')
son = gpagelet( mom)
mom.gpagelets.append( son)
print simplejson.dumps( mom, cls=GwebpageEncoder)
One of the trouble is that 1) I don't know what default is suppose to do 2) I don't know whether GWebpage's default is suppose to return the default or encoded gwebpage
Now I am getting infinite recursion.
Can someone help?