您的位置:首页 > 编程语言 > Python开发

python3实践-从网站获取数据(Carbon Market Data-BJ) (pandas,bs4)

2017-01-08 22:29 537 查看
自己边看边实践一些简单的实际应用,下面的程序是从某个网站上获取需要的数据。

在编写的过程中,通过学习陆续了解到一些方法,发现Python真的是很便捷。

尤其是用pandas获取网页中的表格数据,真的是太方便了!!!

程序写的可能并不好,但基本上实现了自己的需求。

希望有高手来指点下~~

Version 04 (Jan 12 2017)【对于获取表格信息,推荐使用该方法】

# Code based on Python 3.x
# _*_ coding: utf-8 _*_
# __Author: "LEMON"

import pandas as pd

url2 = 'http://www.bjets.com.cn/article/jyxx/?'
links = []
for n in range(2, 40):
# 页面总数为39页,需要自己先从网页判断,也可以从页面抓取,后续可以完善
link = url2 + str(n)
links.append(link)
links.insert(0, url2)

df2 = pd.DataFrame()  # creates a new dataframe that's empty
for url in links:
# 利用pandas获取数据,需要安装 html5lib模块
dfs = pd.read_html(url, header=0)
for df in dfs:
df2= df2.append(df, ignore_index= True)

# df2.to_excel('MktDataBJ.xlsx') # 将数据存储在excel文件里
df2.to_csv('MktDataBJ-1.csv')  # 将数据存储在csv文件里


Version 03 (Jan 12 2017)

# Code based on Python 3.x
# _*_ coding: utf-8 _*_
# __Author: "LEMON"

from bs4 import BeautifulSoup
import requests
import csv

url2 = 'http://www.bjets.com.cn/article/jyxx/?'
links = []
for n in range(2, 40):
# 页面总数为39页,需要自己先从网页判断,也可以从页面抓取,后续可以完善
link = url2 + str(n)
links.append(link)
links.insert(0, url2)

for url in links:
rep = requests.get(url)
# content = rep.text.encode(rep.encoding).decode('utf-8')
# # 直接用requests时,中文内容需要转码

soup = BeautifulSoup(rep.content, 'html.parser')

# table = soup.table
table = soup.find('table')  # 两种方式都可以

trs = table.find_all('tr')
trs2 = trs[1:len(trs)]
list1 = []
for tr in trs2:
td = tr.find_all('td')
row = [i.text for i in td]
list1.append(row)

with open('MktDataBJ.csv', 'a', errors='ignore', newline='') as f:
f_csv = csv.writer(f)
f_csv.writerows(list1)


Version 02 (Jan 09 2017)

# Code based on Python 3.x
# _*_ coding: utf-8 _*_
# __Author: "LEMON"

from bs4 import BeautifulSoup
import requests
import csv

url2 = 'http://www.bjets.com.cn/article/jyxx/?'
links = []
for n in range(2, 40):
# 页面总数为39页,需要自己先从网页判断,也可以从页面抓取,后续可以完善
link = url2 + str(n)
links.append(link)
links.insert(0, url2)
# print(links)

for url in links:
rep = requests.get(url)
# content = rep.text.encode(rep.encoding).decode('utf-8')
# # 直接用requests时,中文内容需要转码

soup = BeautifulSoup(rep.content, 'html.parser')
body = soup.body
data = body.find('div', {'class': 'list_right'})

quotes = data.find_all('tr')
quotes1 = quotes[1:len(quotes)]

list1 = []
for x in quotes1:
list2 = []
for y in x.find_all('td'):
list2.append(y.text)  # 每日的数据做一个单独的list
list1.append(list2)
# print(list1)  # list1为每日数据的总列表
with open('MktDataBJ.csv', 'a', errors='ignore', newline='') as f:
f_csv = csv.writer(f)
f_csv.writerows(list1)


Version 01 (Jan 08 2017)

# Code based on Python 3.x
# _*_ coding: utf-8 _*_
# __Author: "LEMON"

from bs4 import BeautifulSoup
import requests
import csv

urllink = 'http://www.bjets.com.cn/article/jyxx/?'
links = []
for n in range(2, 40):
#页面总数为39页,需要自己先从网页判断,也可以从页面抓取,后续可以完善
link = urllink + str(n)
links.append(link)
links.insert(0, urllink)
# print(links)

for url in links:

rep = requests.get(url)
# content = rep.text.encode(rep.encoding).decode('utf-8')
# # 直接用requests时,中文内容需要转码

soup = BeautifulSoup(rep.content, 'html.parser')

# print(soup.prettify())
# # prettify()

body = soup.body
data = body.find('div', {'class': 'list_right'})

# table title
titles = data.find_all('th')

title = []
for x in titles:
title.append(x.text)
# print(title)

quotes = data.find_all('tr')
quotes1 = quotes[1:len(quotes)]
# print(quotes1)

list1 = []
for x in quotes1:
for y in x.find_all('td'):
list1.append(y.text)
# print(list1)  # list为每日数据的总列表

date = []
volumes = []
meanprice = []
totalmoney = []

for i in range(0, len(list1)):
if i % 4 == 0:
date.append(list1[i])
elif i % 4 == 1:
volumes.append(list1[i])
elif i % 4 == 2:
meanprice.append(list1[i])
else:
totalmoney.append(list1[i])

# print(date)
# print(volumes)
# print(meanprice)
# print(totalmoney)

final = []
for i in range(0, len(date)):
temp = [date[i], volumes[i], meanprice[i], totalmoney[i]]
final.append(temp)
# print(final)
with open('bj_carbon.csv', 'a', errors='ignore', newline='') as f:
f_csv = csv.writer(f)
f_csv.writerows(final)
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: