views:

56

answers:

2

Using pythonm there a good tutorial on downloading from websites both XML and CSV formats.

Trying to get info from financial websites with authorizations. I have id/Pw.

Any thoughts, TIA

I found below, it doesnot work, any help in fixing? Im looking to get stock/option prices.

_version__ = "0.3"
__date__ = "2008-05-09"
__author__ = "Denis Laprise - [email protected]"
__url__ = "http://code.google.com/p/pyetrade/"

import urllib2, urllib, re

class CookieJar:

    def __init__(self):
        self._cookies = {}

    def extractCookies(self, response, nameFilter = None):
        for cookie in response.headers.getheaders('Set-Cookie'):
            name, value = (cookie.split("=", 1) + [""])[:2]
            if not nameFilter or name in nameFilter:
                self._cookies[name] = value.split(";")[0]


    def addCookie(self, name, value):
        self._cookies[name] = value

    def hasCookie(self, name):
        return self._cookies.has_key(name)

    def setCookies(self, request):
        request.add_header('Cookie',
                           "; ".join(["%s=%s" % (k,v)
                                     for k,v in self._cookies.items()]))

class GHTTPCookieProcessor(urllib2.BaseHandler):
    def __init__(self, cookieJar):
        self.cookies = cookieJar

    def https_response(self, request, response):
        self.cookies.extractCookies(response)
        return response

    def https_request(self, request):
        self.cookies.setCookies(request)
        return request

GHTTPCookieProcessor.http_request = GHTTPCookieProcessor.https_request
GHTTPCookieProcessor.http_response = GHTTPCookieProcessor.https_response

class LoginFailure(Exception):
    pass

class OrderFailure(Exception):
    pass

class InvalidMarketError(Exception):
    pass

class InvalidOrderType(Exception):
    pass

class InvalidPriceType(Exception):
    pass

class Session:
    """
        An abstract class to represent an E*Trade session. Handle login and subsequent cookie dance.
    """
    def __init__(self, username, password):
        self._cookies = CookieJar()
        self._username = 'useryou'
        self._password = 'xxxxxx'

    def doLogin(self):
        """
        A login method which can be used to log to E*Trade
        """
        raise NotImplementedError

    def getPage(self, url, post_data=None):
        """
        Gets the url URL with cookies enabled. Posts post_data.
        """
        req = urllib2.build_opener(GHTTPCookieProcessor(self._cookies))
        req.addheaders = [('User-agent', "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)")]
        if post_data.__class__ == dict:
            post = urllib.urlencode(post_data)
        else:
            post = post_data
        f = req.open(self._encode(url), data=post)
        if f.headers.dict.has_key('set-cookie'):
            self._cookies.extractCookies(f)
        return f

    def _encode(self, value): # This method is copyright (C) 2004, Adrian Holovaty
        """
        Helper method. Google uses UTF-8, so convert to it, in order to allow
        non-ASCII characters.
        """
        if isinstance(value, unicode):
            value = value.encode("utf-8")
        return value

    def _getOptionUrl(self):
        raise NotImplementedError

    def _validateMarket(self, market):
        raise NotImplementedError




class USASession(Session):

   def __init__(self, username, password):
      Session.__init__(self, username, password)
      self._markets = {'U.S.' : 'A', 'CDN' : 'C'}
      self._columns = ['SYMBOL', 'STRIKE', 'BID', 'ASK', 'LAST', 'VALUE', 'OVER', 'DELTA', 'GAMMA', 'THETA', 'VEGA', 'VOLUME', 'OPENS']

   def doLogin(self):
      url = "https://us.etrade.com/login.fcc"   
      params = {'countrylangselect' : "us_english", 'USER' : self._username, 'PASSWORD' : self._password, 'TARGET' : "/e/t/invest/socreateentry"}
      p = self.getPage(url, params)
      post = urllib.urlencode(params)
      if not self._cookies.hasCookie('SMSESSION'):
         raise LoginFailure
      return self

   def placeOrder(self, orderType, numberShares, stockSymbol, priceType, price):
      url = "https://us.etrade.com/e/t/invest/socreateentry"
      # 1 Buy 2 Sell
      if not orderType in ["1", "2"]:
         raise InvalidOrderType
      # 1 Market 2 Limit
      if not priceType in ["1", "2"]:
         raise InvalidPriceType
      params = {'ordertype' : orderType, 'symbol' : stockSymbol, 'quantity' : numberShares, 'pricetype' : priceType, 'limitprice' : price}
      post = urllib.urlencode(params)
      print "post is"
      print post
      p = self.getPage(url, params)
      print "The page real url is"
      print p.geturl()
      #print p.read()
      if not self._cookies.hasCookie('SMSESSION'):
         raise OrderFailure
      return self
A: 

For logging in and browsing, use mechanize, and for extracting data, use BeautifulSoup.

leoluk
BeautifulSoup, its too complicated
+2  A: 

What you describe is a relatively small task in Python and I would guess there are not much tutorials about it.

Basically, to retrieve a document (no matter if XML or CSV) from a website you can use urllib2:

import urllib2
data = urllib2.urlopen("http://www.example.org/something.xml").read()

To work with XML you could use ElementTree:

import xml.etree.ElementTree as ElementTree
rootelem = ElementTree.fromstring(data)

Now you can inspect the XML tree with the ElementTree API. See the documentation for further information.

To work with CSV you could use the csv module:

import csv
csvreader = csv.reader([data])

You can read the values in a simple for loop. Again see the documentation.

If this doesn't answer your question, please describe in more detail what you would like to achieve.

Noya
Your altruism is legendary. Well done.
jathanism