views:

30

answers:

0

Ok i am getting the error in the subject line from the deferred.py file, I realize this error is caused by something not loading but does anyone have any thoughts. This script is being called from a url on the production server. I don't see why the BulkPageParser class is being called in the error.

import time
import random
import string
import cPickle
from StringIO import StringIO
try:
    import json
except ImportError:
    import simplejson as json 
import urllib
import pprint
import datetime
import sys
sys.path.append("C:\Program Files (x86)\Google\google_appengine")
sys.path.append("C:\Program Files (x86)\Google\google_appengine\lib\yaml\lib")
sys.path.append("C:\Program Files (x86)\Google\google_appengine\lib\webob")
from google.appengine.api import users
from google.appengine.ext import webapp
from google.appengine.ext.webapp.util import run_wsgi_app
from google.appengine.ext import db
class SR(db.Model):
    name = db.StringProperty()
    title = db.StringProperty()
    url = db.StringProperty()

##request url and returns JSON_data
def overview(page):
     u = urllib.urlopen(page)
     bytes = StringIO(u.read())
     ##print bytes
     u.close()
     try:
        JSON_data = json.load(bytes)
        return JSON_data
     except ValueError,e:
        print e," Couldn't get .json for %s" % page
        return None

class Todo(db.Model):
    page = db.StringProperty()

class BulkPageParser(bulkupdate.BulkUpdater):
    def get_query(self):
        return Todo.all()

def handle_entity(self, entity):
    JSON_data = overview(entity.page)
    data_gathered,new_links = parse_json(JSON_data, [])##like earlier returns the a list from sr objects, and now a list of new links/pages to go to
    db.put(data_gathered)
    for link in new_links:
        Todo(page=link).put()
    entity.delete()


pagei = 'startingurl.com'
Todo(page=pagei).put()
job = BulkPageParser()
job.start()