



Using pythonm there a good tutorial on downloading from websites both XML and CSV formats.

Trying to get info from financial websites with authorizations. I have id/Pw.

Any thoughts, TIA

I found below, it doesnot work, any help in fixing? Im looking to get stock/option prices.

_version__ = "0.3"
__date__ = "2008-05-09"
__author__ = "Denis Laprise - [email protected]"
__url__ = ""

import urllib2, urllib, re

class CookieJar:

    def __init__(self):
        self._cookies = {}

    def extractCookies(self, response, nameFilter = None):
        for cookie in response.headers.getheaders('Set-Cookie'):
            name, value = (cookie.split("=", 1) + [""])[:2]
            if not nameFilter or name in nameFilter:
                self._cookies[name] = value.split(";")[0]

    def addCookie(self, name, value):
        self._cookies[name] = value

    def hasCookie(self, name):
        return self._cookies.has_key(name)

    def setCookies(self, request):
                           "; ".join(["%s=%s" % (k,v)
                                     for k,v in self._cookies.items()]))

class GHTTPCookieProcessor(urllib2.BaseHandler):
    def __init__(self, cookieJar):
        self.cookies = cookieJar

    def https_response(self, request, response):
        return response

    def https_request(self, request):
        return request

GHTTPCookieProcessor.http_request = GHTTPCookieProcessor.https_request
GHTTPCookieProcessor.http_response = GHTTPCookieProcessor.https_response

class LoginFailure(Exception):

class OrderFailure(Exception):

class InvalidMarketError(Exception):

class InvalidOrderType(Exception):

class InvalidPriceType(Exception):

class Session:
        An abstract class to represent an E*Trade session. Handle login and subsequent cookie dance.
    def __init__(self, username, password):
        self._cookies = CookieJar()
        self._username = 'useryou'
        self._password = 'xxxxxx'

    def doLogin(self):
        A login method which can be used to log to E*Trade
        raise NotImplementedError

    def getPage(self, url, post_data=None):
        Gets the url URL with cookies enabled. Posts post_data.
        req = urllib2.build_opener(GHTTPCookieProcessor(self._cookies))
        req.addheaders = [('User-agent', "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)")]
        if post_data.__class__ == dict:
            post = urllib.urlencode(post_data)
            post = post_data
        f =, data=post)
        if f.headers.dict.has_key('set-cookie'):
        return f

    def _encode(self, value): # This method is copyright (C) 2004, Adrian Holovaty
        Helper method. Google uses UTF-8, so convert to it, in order to allow
        non-ASCII characters.
        if isinstance(value, unicode):
            value = value.encode("utf-8")
        return value

    def _getOptionUrl(self):
        raise NotImplementedError

    def _validateMarket(self, market):
        raise NotImplementedError

class USASession(Session):

   def __init__(self, username, password):
      Session.__init__(self, username, password)
      self._markets = {'U.S.' : 'A', 'CDN' : 'C'}
      self._columns = ['SYMBOL', 'STRIKE', 'BID', 'ASK', 'LAST', 'VALUE', 'OVER', 'DELTA', 'GAMMA', 'THETA', 'VEGA', 'VOLUME', 'OPENS']

   def doLogin(self):
      url = ""   
      params = {'countrylangselect' : "us_english", 'USER' : self._username, 'PASSWORD' : self._password, 'TARGET' : "/e/t/invest/socreateentry"}
      p = self.getPage(url, params)
      post = urllib.urlencode(params)
      if not self._cookies.hasCookie('SMSESSION'):
         raise LoginFailure
      return self

   def placeOrder(self, orderType, numberShares, stockSymbol, priceType, price):
      url = ""
      # 1 Buy 2 Sell
      if not orderType in ["1", "2"]:
         raise InvalidOrderType
      # 1 Market 2 Limit
      if not priceType in ["1", "2"]:
         raise InvalidPriceType
      params = {'ordertype' : orderType, 'symbol' : stockSymbol, 'quantity' : numberShares, 'pricetype' : priceType, 'limitprice' : price}
      post = urllib.urlencode(params)
      print "post is"
      print post
      p = self.getPage(url, params)
      print "The page real url is"
      print p.geturl()
      if not self._cookies.hasCookie('SMSESSION'):
         raise OrderFailure
      return self

For logging in and browsing, use mechanize, and for extracting data, use BeautifulSoup.

BeautifulSoup, its too complicated
+2  A: 

What you describe is a relatively small task in Python and I would guess there are not much tutorials about it.

Basically, to retrieve a document (no matter if XML or CSV) from a website you can use urllib2:

import urllib2
data = urllib2.urlopen("").read()

To work with XML you could use ElementTree:

import xml.etree.ElementTree as ElementTree
rootelem = ElementTree.fromstring(data)

Now you can inspect the XML tree with the ElementTree API. See the documentation for further information.

To work with CSV you could use the csv module:

import csv
csvreader = csv.reader([data])

You can read the values in a simple for loop. Again see the documentation.

If this doesn't answer your question, please describe in more detail what you would like to achieve.

Your altruism is legendary. Well done.