course:python:lesson11

# 第十一课 经济，金融数据应用

• Python当中的matplotlib module有一个finance module能够获取各公司的股票历史数据并绘图。
from pylab import figure, show
from matplotlib.finance import quotes_historical_yahoo
from matplotlib.dates import YearLocator, MonthLocator, DateFormatter
import datetime
date1 = datetime.date( 2013, 1, 1 )
date2 = datetime.date( 2013, 11, 11 )

daysFmt  = DateFormatter('%m-%d-%Y')

quotes = quotes_historical_yahoo('MSFT', date1, date2)              # 获取在date1和date2期间的微软股票
if len(quotes) == 0:
raise SystemExit

dates = [q for q in quotes]
opens = [q for q in quotes]

fig = figure()
ax.plot_date(dates, opens, '-')

# format the ticks
ax.xaxis.set_major_formatter(daysFmt)
ax.autoscale_view()

# format the coords message box
def price(x): return '\$%1.2f'%x
ax.fmt_xdata = DateFormatter('%Y-%m-%d')
ax.fmt_ydata = price
ax.grid(True)

fig.autofmt_xdate()
show()
• quotes_historical_yahoo是一个获取yahoo历史数据的函数，需要输入公司的Ticker Symbol和查询起止日期，输出为一缓冲文件，具体代码如下：
def quotes_historical_yahoo(ticker, date1, date2, asobject=False,
"""
Get historical data for ticker between date1 and date2.  date1 and
date2 are datetime instances or (year, month, day) sequences.

See :func:parse_yahoo_historical for explanation of output formats
and the *asobject* and *adjusted* kwargs.

Ex:
sp = f.quotes_historical_yahoo('^GSPC', d1, d2,
returns = (sp.open[1:] - sp.open[:-1])/sp.open[1:]
[n,bins,patches] = hist(returns, 100)
mu = mean(returns)
sigma = std(returns)
x = normpdf(bins, mu, sigma)
plot(bins, x, color='red', lw=2)

cachename is the name of the local file cache.  If None, will
default to the md5 hash or the url (which incorporates the ticker
and date range)
"""
# Maybe enable a warning later as part of a slow transition
# to using None instead of False.
#if asobject is False:
#    warnings.warn("Recommend changing to asobject=None")

fh = fetch_historical_yahoo(ticker, date1, date2, cachename)

try:
ret = parse_yahoo_historical(fh, asobject=asobject,
if len(ret) == 0:
return None
except IOError as exc:
warnings.warn('fh failure\n%s'%(exc.strerror))
return None

return ret  
• parse_yahoo_historical函数可对历史数据进行解析，读取文件，对文件部分内容进行操作，代码如下：
def parse_yahoo_historical(fh, adjusted=True, asobject=False):
"""
Parse the historical data in file handle fh from yahoo finance.

If True (default) replace open, close, high, and low prices with
their adjusted values. The adjustment is by a scale factor, S =
multiplied by S.

by Yahoo. If you want to compute dollars traded, multiply volume
= True|False.

*asobject*
If False (default for compatibility with earlier versions)
return a list of tuples containing

d, open, close, high, low, volume

If None (preferred alternative to False), return
a 2-D ndarray corresponding to the list of tuples.

Otherwise return a numpy recarray with

date, year, month, day, d, open, close, high, low,

where d is a floating poing representation of date,
as returned by date2num, and date is a python standard
library datetime.date instance.

The name of this kwarg is a historical artifact.  Formerly,
True returned a cbook Bunch
holding 1-D ndarrays.  The behavior of a numpy recarray is
very similar to the Bunch.

"""

results = []

datefmt = '%Y-%m-%d'

for line in lines[1:]:

vals = line.split(',')
if len(vals)!=7:
datestr = vals
#dt = datetime.date(*time.strptime(datestr, datefmt)[:3])
# Using strptime doubles the runtime. With the present
# format, we don't need it.
dt = datetime.date(*[int(val) for val in datestr.split('-')])
dnum = date2num(dt)
open, high, low, close =  [float(val) for val in vals[1:5]]
volume = float(vals)
aclose = float(vals)

results.append((dt, dt.year, dt.month, dt.day,
dnum, open, close, high, low, volume, aclose))
results.reverse()
d = np.array(results, dtype=stock_dt)
scale = d['aclose'] / d['close']
scale[np.isinf(scale)] = np.nan
d['open'] *= scale
d['close'] *= scale
d['high'] *= scale
d['low'] *= scale

if not asobject:
# 2-D sequence; formerly list of tuples, now ndarray
ret = np.zeros((len(d), 6), dtype=np.float)
ret[:,0] = d['d']
ret[:,1] = d['open']
ret[:,2] = d['close']
ret[:,3] = d['high']
ret[:,4] = d['low']
ret[:,5] = d['volume']
if asobject is None:
return ret
return [tuple(row) for row in ret]

return d.view(np.recarray)  # Close enough to former Bunch return  
• 另外，如果无需操作历史数据，只需下载存储到本地文件可参考下面代码：
#this example can download the data in finance.yahoo and put in our computers

import os,urllib2,urllib

ticker = 'MSFT'           #the Ticker Symbol
date1 = ( 2012, 1, 1 )    #begining time
date2 = ( 2012, 11, 11 )  #ending time

d1 = (date1-1, date1, date1)

d2 = (date2-1, date2, date2)

g='d'

urlFmt = 'http://table.finance.yahoo.com/table.csv?a=%d&b=%d&c=%d&d=%d&e=%d&f=%d&s=%s&y=0&g=%s&ignore=.csv'
url =  urlFmt % (d1, d1, d1,
d2, d2, d2, ticker, g)  #the url of historical data
print url

path = r'C:\Users\yinyao\Desktop\Python code'  #Saving path
file_name = r'\ticker.csv'                #file name
dest_dir = os.path.join(path,file_name)   #located file
urllib.urlretrieve(url,dest_dir)        #download the data and put in located file  
• course/python/lesson11.txt
• 最后更改: 2014/05/11 20:47
• (外部编辑)