class gpagelet:
"""
Holds 1) the pagelet xpath, which is a string
2) the list of pagelet shingles, list
"""
def __init__(self, parent):
if not isinstance( parent, gwebpage):
raise Exception("Parent must be an instance of gwebpage")
self.parent = parent # This must be a gwebpage instance
self.xpath = None # String
self.visibleShingles = [] # list of tuples
self.invisibleShingles = [] # list of tuples
self.urls = [] # list of string
class gwebpage:
"""
Holds all the datastructure after the results have been parsed
holds: 1) lists of gpagelets
2) loc, string, location of the file that represents it
"""
def __init__(self, url):
self.url = url # Str
self.netloc = False # Str
self.gpagelets = [] # gpagelets instance
self.page_key = "" # str
Is there a way for me to make my class json serializable? The thing that I am worried is the recursive reference.
Write your own encoder and decoder, which can be very simple like return __dict__
e.g. here is a encoder to dump totally recursive tree structure, you can enhance it or use as it is for your own purpose
import json
class Tree(object):
def __init__(self, name, childTrees=None):
self.name = name
if childTrees is None:
childTrees = []
self.childTrees = childTrees
class MyEncoder(json.JSONEncoder):
def default(self, obj):
if not isinstance(obj, Tree):
return super(MyEncoder, self).default(obj)
return obj.__dict__
c1 = Tree("c1")
c2 = Tree("c2")
t = Tree("t",[c1,c2])
print json.dumps(t, cls=MyEncoder)
it prints
{"childTrees": [{"childTrees": [], "name": "c1"}, {"childTrees": [], "name": "c2"}], "name": "t"}
you can similarly write a decoder but there you will somehow need to identify is it is your object or not, so may be you can put a type too if needed.