I have an app I was working on to learn more about wxPython( I have been primarily been a scripter ). I forgot about it now I am opening it back up. It's a screen scraper, and I have it working almost the way I want it, going to build a regex parser to strip out the links in every scrape that I don't need. The questions I have are this. In it current state, if I check more than one site, it goes out and scrapes, and returns it in separate windows, the for:each section in the Clicked function. I want to put them in a frame, in the window, altogether. I also want to know if I can take the list they are read into and send it to a checklist, so someone could check off separate items, I want to build a save function and keep certain ones. In regards to a save function, I want to keep saved checks, are there calls to the widgets to save their states? I know it's a lot, but thanks for the help.
EDIT (forgot code)
import wx
import urllib2
from BeautifulSoup import BeautifulSoup
from BeautifulSoup import Tag
import re
from pyparsing import makeHTMLTags, originalTextFor, SkipTo, Combine
import wx
import wx.html
global P
siteDict = {0:'http://www.reddit.com', 1:'http://www.boston.com', 2:'http://www.stumbleupon.com', 3:'news.google.com'}
class citPanel(wx.Panel):
def __init__(self, parent, id):
wx.Panel.__init__(self, parent, id)
allSites = ['Reddit', 'Boston.com', 'StumbleUpon', 'Google News']
wx.StaticText(self, -1, "Choose the Sites you would like Charlie to Visit:", (45, 15))
self.sitList = wx.CheckListBox(self, 20, (60, 50), wx.DefaultSize, allSites)
class nextButton(wx.Button):
def __init__(self, parent, id, label, pos):
wx.Button.__init__(self, parent, id, label, pos)
class checkList(wx.Frame):
def __init__(self, parent, id, title):
wx.Frame.__init__(self, parent, id, title, size=(400, 300))
self.panel = citPanel(self, -1)
nextButton(self.panel, 30, 'Ok', (275, 50))
self.Bind(wx.EVT_BUTTON, self.Clicked)
self.Centre()
self.Show(True)
def Clicked(self, event):
checkedItems = [i for i in range(self.panel.sitList.GetCount()) if self.panel.sitList.IsChecked(i)]
print checkedItems
r = [siteDict[k] for k in checkedItems]
print r
for each in r:
pre = '<HTML><head><title>Page title</title></head>'
post = '</HTML>'
site = urllib2.urlopen(each)
html=site.read()
soup = BeautifulSoup(html)
tags = soup.findAll('a')
soup1 = BeautifulSoup(''.join(str(t) for t in tags))
for link in soup1.findAll('a'):
br= Tag(soup, 'p')
index= link.parent.contents.index(link)
link.parent.insert(index+1, br)
P = soup1.prettify()
print P
#for link2 in soup1.findAll('a'):
#p1= Tag(soup, '
frm = MyHtmlFrame(None, "Charlie", P)
frm.Show()
class MyHtmlFrame(wx.Frame):
def __init__(self, parent, title, page):
wx.Frame.__init__(self, parent, -1, title)
html = wx.html.HtmlWindow(self)
if "gtk2" in wx.PlatformInfo:
html.SetStandardFonts()
html.SetPage(page)
#app = wx.PySimpleApp()
#app.MainLoop()
#event.Skip()
#self.Destroy()
app = wx.App()
checkList(None, -1, 'Charlie')
app.MainLoop()