您的位置:首页 > 编程语言 > Python开发

python爬虫之爬取CQU毕业设计网批量获取数据

2018-03-02 15:16 525 查看
数据在网页源代码中,使用正则表达式,匹配数据import requests
import csv
import re
import time

def cqu_login(spyder, ues_name , pass_word):
"""模拟登陆CQU毕业设计网"""
url = "http://bysj.cqu.edu.cn/bysj/login.htm"
data = {"id": ues_name,
"pwd": pass_word,
"type": "student",
"btlogin": "登陆"
}
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36"
}

response = spyder.post(url,data=data,headers=headers)
response.encoding="utf-8"

def get_data(spyder):
"""获取个人信息"""
url = "http://bysj.cqu.edu.cn/bysj/student/viewStudentPage.htm"
response = spyder.get(url)
response.encoding = "utf-8"
return response.text

def analysis_data(data):
"""提取信息"""
result = re.findall(".*>(.*)</td>.*", data)
return result

def save_data(result, use_name):
"""保存数据"""
list_key =[]
list_value = []
for i in range(1, len(result)):
if i%2 == 1:
list_key.append(result[i].rstrip())
else:
list_value.append(result[i].rstrip())
with open(use_name+"学生信息表.csv", "w") as csvfile:
writer = csv.writer(csvfile)

# 先写入columns_name
writer.writerow(["学生信息", "学生数据"])
# 写入多行用writerows
writer.writerows([list_key, list_value])

def get_many_data(pass_word):
for i in range(20146350,20146450):
print("正在获取"+str(i)+"的数据...")
spyder = requests.session()
cqu_login(spyder, str(i), pass_word)
data = get_data(spyder)
result = analysis_data(data)
save_data(result, str(i))
time.sleep(1)

if __name__ == '__main__':
pass_word = input("请输入密码:")
get_many_data(pass_word)
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: