您的位置:首页 > 其它

Download Chinese Stock Data from Sina Finance

2015-04-16 11:06 288 查看
import numpy as np
import pandas as pd
from urllib2 import urlopen
from bs4 import BeautifulSoup

def get_year_range(code):
url = 'http://vip.stock.finance.sina.com.cn/corp/go.php/vMS_MarketHistory/stockid/%s.phtml' % code
content = urlopen(url).read()
soup = BeautifulSoup(content)
str1 = soup.findAll('select', attrs={'name':'year'})
optionSoup = str1[0]
optionTags = optionSoup.findAll('option')
yearlist = []
for i in range(0, len(optionTags)):
yearlist.append(optionTags[i].string)
return (yearlist)

def get_data(code):
yearlist = get_year_range(code)
df = pd.DataFrame()
for year in range(0,len(yearlist)):
for season in range(1,5):
try:
jidu = str(season)
codestr = str(code)
url = 'http://vip.stock.finance.sina.com.cn/corp/go.php/vMS_MarketHistory/stockid/'+codestr+'.phtml?year='+yearlist[year]+'&jidu='+jidu
rsp = urlopen(url)
html = rsp.read()
soup = BeautifulSoup(html, from_encoding = 'GB2312')
tablesoup = soup.find_all('table', attrs = {'id':'FundHoldSharesTable'})

date = []
openprice = []
closeprice = []
highprice = []
lowprice = []
sharevolume = []
dollarvolume = []

rows = tablesoup[0].findAll('tr')
#colume = rows[1].findAll('td')

for row in rows[2:]:
data = row.findAll('td')
date.append(data[0].get_text(strip = True))
openprice.append(data[1].get_text(strip = True))
highprice.append(data[2].get_text(strip = True))
closeprice.append(data[3].get_text(strip = True))
lowprice.append(data[4].get_text(strip = True))
sharevolume.append(data[5].get_text(strip = True))
dollarvolume.append(data[6].get_text(strip = True))
data = np.array([date, openprice, closeprice, highprice, lowprice, sharevolume, dollarvolume])
data = pd.DataFrame(data).T
data.columns = ['Date', 'Open', 'Close', 'High', 'Low', 'ShareVolume', 'DollarVolume']

df = df.append(data)
#print('Finish '+ yearlist[year] + ' Season '+ jidu)
except:
#print('Error!')
continue
print('Finish All: '+ code + '!')
filedestiny = './data/price_'+str(code)+'.csv'
df.to_csv(filedestiny, encoding='utf-8', index=False)

ratingdata = pd.read_csv('stocklist.csv')
stockcode = list(np.unique(ratingdata.Code).dropna())
for i in xrange(len(stockcode)):
code = str(int(stockcode[i]))
stockcode[i] = '0'*(6-len(code))+code

stockcodesub = stockcode[1932:]

for code in stockcodesub:
get_data(code)
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: