您的位置:首页 > 编程语言 > Python开发

python 爬取数据,并写入数据库

2018-02-08 17:29 211 查看
# coding=utf-8

import json
import requests
from bs4 import BeautifulSoup

import db

# 获取data
def get_data(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'};
response = requests.post(url, headers=headers, verify=False);
response.encoding = 'utf-8';
text = response.text;
# text = text.replace(' ', '');
# text = text.replace('	', '');
text = text.replace('\n', '');
text = text.replace('\r', '');
return text;

# 数据写入文件
def writeData(listData):
db.dbInsert(listData)
# file = open("jd.txt", "a+", encoding='utf-8');
# for data in listData:
#     writeData = json.dumps(data, ensure_ascii=False);
#     file.writelines(writeData);
#     file.writelines('\n');
# file.flush();
# file.close();

# 分析出想要的数据
# 返回的数据格式数list<Object>
def analyzeData(html):
listData = [];
soup = BeautifulSoup(html, 'lxml');
contentList = soup.select('div.p-name');
for content in contentList:
detail = {};
detail['key'] = content.select('a')[0].attrs.get('href');
detail['value'] = content.text.strip();
listData.append(detail);
return listData;

page = 0;
lastKey = '';
while True:
# 分页获取数据
page = page + 1;
url = "";
data = get_data(url);
listData = analyzeData(data);
lastKeyTemp = listData[len(listData) - 1].get('key');
if lastKeyTemp == lastKey:
break;
else:
lastKey = lastKeyTemp;
writeData(listData);
import psycopg2def dbInsert(listData):conn = psycopg2.connect(database='', user='', password='',host='',port='5432')cur = conn.cursor()for data in listData:try:cur.execute(r"""INSERT INTO t_data ("key", "value") VALUES('""" + data['key'] + """', '""" + data['value'] + """')""")except psycopg2.Error:continueconn.commit()# print('insert successfully')conn.close()
                                            
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  python