tags:

views:

176

answers:

1

Hey everyone I don't know whether I am doing the right thing here, basically I want both of my class to be json-serializable.

import json

class gpagelet(json.JSONEncoder):
    """
    Holds   1) the pagelet xpath, which is a string
            2) the list of pagelet shingles, list
    """
    def __init__(self, parent):
        if not isinstance( parent, gwebpage):
            raise Exception("Parent must be an instance of gwebpage")
        self.parent = parent    # This must be a gwebpage instance
        self.xpath = None       # This is just an id for the pagelet (not unique across page), historically called xpath
        self.visibleShingles = []
        self.invisibleShingles = []
        self.urls = []


    def __str__(self):
        """String representation of this object"""
        ret = ""
        ret += "xpath: %s\n"  % self.xpath

        def appendShingles():
            ret += "shingles: \n"
            for each in self.shingles:
                ret += "%s\n" % str(each)

        ret += "urls:\n"
        for each in self.urls:
            ret += "%s\n" % str( each)
        return ret





class gwebpage(json.JSONEncoder):
    """
    Holds all the datastructure after the results have been parsed
    holds:  1) lists of gpagelets
            2) loc, string, location of the file that represents it
    """
    def __init__(self, url):
        self.url = url              # This will be http://
        self.netloc = False         # This will be http:// too
        self.gpagelets = []
        # Appended by functions
        self.page_key = ""


    def __str__(self):
        ret = ""
        ret += "url: %s\n" % self.url
        ret += "netloc: %s\n" % self.netloc
        ret += "page_key: %s\n" % self.page_key
        ret += "pagelets:\n"
        for each in self.gpagelets:
            ret += "%s\n" % each.__str__()
        return ret


class GpageletEncoder( json.JSONEncoder):

    def default(self, gp):
        gwebpageEncoder = GwebpageEncoder()
        if not isinstance( gp, gpagelet):
            raise Exception( "Cannot use GpageletEncoder on a non gpagelet instance")
        u = { }
        u['parent'] = gwebpageEncoder.default( gp.parent)
        u['xpath'] = gp.xpath
        u['visibleShingles'] = gp.visibleShingles
        u['invisibleShingles'] = gp.invisibleShingles
        u['urls'] = gp.urls
        return u




class GwebpageEncoder( json.JSONEncoder):


    def default(self, gw):
        gpageletEncoder = GpageletEncoder()
        if not isinstance( gw, gwebpage):
            raise Exception( "Cannot use gwebpageEncoder on a non gwebpage instance")
        u = { }
        u['url'] = gw.url
        u['netloc'] = gw.netloc
        u['gpagelets'] = [ gpageletEncoder.default( each) for each in gw.gpagelets ]
        u['page_key'] = gw.page_key
        return u





if __name__ == "__main__":

    import simplejson
    mom = gwebpage('http://www.google.com')
    son = gpagelet( mom)
    mom.gpagelets.append( son)
    print simplejson.dumps( mom, cls=GwebpageEncoder)

One of the trouble is that 1) I don't know what default is suppose to do 2) I don't know whether GWebpage's default is suppose to return the default or encoded gwebpage

Now I am getting infinite recursion.

Can someone help?

A: 

FYI I am seeing a lot of trouble with this: u['gpagelets'] = [ gpageletEncoder.default( each) for each in gw.gpagelets ]