python爬虫之爬取CQU毕业设计网批量获取数据
2018-03-02 15:16
525 查看
数据在网页源代码中,使用正则表达式,匹配数据import requests
import csv
import re
import time
def cqu_login(spyder, ues_name , pass_word):
"""模拟登陆CQU毕业设计网"""
url = "http://bysj.cqu.edu.cn/bysj/login.htm"
data = {"id": ues_name,
"pwd": pass_word,
"type": "student",
"btlogin": "登陆"
}
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36"
}
response = spyder.post(url,data=data,headers=headers)
response.encoding="utf-8"
def get_data(spyder):
"""获取个人信息"""
url = "http://bysj.cqu.edu.cn/bysj/student/viewStudentPage.htm"
response = spyder.get(url)
response.encoding = "utf-8"
return response.text
def analysis_data(data):
"""提取信息"""
result = re.findall(".*>(.*)</td>.*", data)
return result
def save_data(result, use_name):
"""保存数据"""
list_key =[]
list_value = []
for i in range(1, len(result)):
if i%2 == 1:
list_key.append(result[i].rstrip())
else:
list_value.append(result[i].rstrip())
with open(use_name+"学生信息表.csv", "w") as csvfile:
writer = csv.writer(csvfile)
# 先写入columns_name
writer.writerow(["学生信息", "学生数据"])
# 写入多行用writerows
writer.writerows([list_key, list_value])
def get_many_data(pass_word):
for i in range(20146350,20146450):
print("正在获取"+str(i)+"的数据...")
spyder = requests.session()
cqu_login(spyder, str(i), pass_word)
data = get_data(spyder)
result = analysis_data(data)
save_data(result, str(i))
time.sleep(1)
if __name__ == '__main__':
pass_word = input("请输入密码:")
get_many_data(pass_word)
import csv
import re
import time
def cqu_login(spyder, ues_name , pass_word):
"""模拟登陆CQU毕业设计网"""
url = "http://bysj.cqu.edu.cn/bysj/login.htm"
data = {"id": ues_name,
"pwd": pass_word,
"type": "student",
"btlogin": "登陆"
}
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36"
}
response = spyder.post(url,data=data,headers=headers)
response.encoding="utf-8"
def get_data(spyder):
"""获取个人信息"""
url = "http://bysj.cqu.edu.cn/bysj/student/viewStudentPage.htm"
response = spyder.get(url)
response.encoding = "utf-8"
return response.text
def analysis_data(data):
"""提取信息"""
result = re.findall(".*>(.*)</td>.*", data)
return result
def save_data(result, use_name):
"""保存数据"""
list_key =[]
list_value = []
for i in range(1, len(result)):
if i%2 == 1:
list_key.append(result[i].rstrip())
else:
list_value.append(result[i].rstrip())
with open(use_name+"学生信息表.csv", "w") as csvfile:
writer = csv.writer(csvfile)
# 先写入columns_name
writer.writerow(["学生信息", "学生数据"])
# 写入多行用writerows
writer.writerows([list_key, list_value])
def get_many_data(pass_word):
for i in range(20146350,20146450):
print("正在获取"+str(i)+"的数据...")
spyder = requests.session()
cqu_login(spyder, str(i), pass_word)
data = get_data(spyder)
result = analysis_data(data)
save_data(result, str(i))
time.sleep(1)
if __name__ == '__main__':
pass_word = input("请输入密码:")
get_many_data(pass_word)
相关文章推荐
- Python 爬虫 PhantomJs 获取JS动态数据
- Python爬虫实战--(三)获取网页中的动态数据
- Python爬虫实践:获取空气质量历史数据
- python爬虫获取郑大教务在线成绩数据
- Python实现简单的爬虫获取某刀网的更新数据
- 【原创】python爬虫获取网站数据并存入本地数据库
- python爬虫获取数据后存入MySQL数据库中
- 【爬虫】Python2爬虫代码之获取金融品种行情数据
- Python爬虫:获取链家,搜房,大众点评的数据
- 爬虫小探-Python3 urllib.request获取页面数据
- selenium自动化测试工具开发python爬虫-动态加载页面数据获取
- Python 爬虫 PhantomJs 获取JS动态数据
- python 爬虫获取json数据存入文件时乱码
- python爬虫登录正方教务管理系统获取成绩数据
- Python爬虫之CQU就业网数据
- 复杂的网页爬虫,python获取网页指的格式数据
- 编写python爬虫 获取中华英才网全网工资数据
- 利用python爬虫技术获取每天每场的每位球员NBA数据以及每日范特西评分
- python3的爬虫算法(1)--获取网页数据
- Python爬虫学习,记一次抓包获取js,从js函数中取数据的过程