您的位置:首页 > 编程语言 > Python开发

python 爬虫爬取几十家门店在美团外卖上的排名,并插入数据库,最后在前端显示

2016-08-04 12:20 666 查看
爬虫脚本:

#!/usr/bin/env python
# encoding: utf-8

"""
@version: ??
@author: phpergao
@license: Apache Licence
@file: meituan_paiming.py
@time: 2016/8/1 15:16
"""

import urllib,json,re
import urllib.parse
import http.cookiejar
import urllib.request,datetime,time,SQL
from multiprocessing import Process
import collections

def main(store_name,paiming):
## ms = MSSQL(host="localhost",user="sa",pwd="123456",db="PythonWeiboStatistics")
## #返回的是一个包含tuple的list,list的元素是记录行,tuple的元素是每行记录的字段
## ms.ExecNonQuery("insert into WeiBoUser values('2','3')")

ms = SQL.MSSQL(host='192.168.72.172',user="stdservice",pwd="7数据库密码",db="stddata")
#resList = ms.ExecQuery(cmd)
#print(resList)
#for i in resList:
#   print(i)
#dele =ms.ExecNonQuery("DELETE FROM waimai4.dbo.baidu_rueren")
#update=ms.ExecNonQuery("UPDATE dbo.GOODS SET CLASSID='19' WHERE GOODSNAME LIKE'%牛肉%'")
today = datetime.date.today()
tm=time.strftime("%H:%M:%S")
sj="{} {}".format(str(today),tm)
insert=ms.ExecNonQuery('''
INSERT INTO dbo.meituan_paiming

VALUES  ( '{}' , -- store_name - char(20)
'{}' , -- paiming - int
' ' , -- dingwei_address - char(500)
'{}'  -- updatetime - char(50)
)
'''.format(store_name,paiming,sj))

def paiming(url):
cj = http.cookiejar.LWPCookieJar()
cookies_support = urllib.request.HTTPCookieProcessor(cj)
opener = urllib.request.build_opener(cookies_support, urllib.request.HTTPHandler)
urllib.request.install_opener(opener)
User_Agent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36"#伪装成浏览器访问
headers = ('User-Agent', User_Agent)
opener = urllib.request.build_opener()
opener.addheaders = [headers]
ret = urllib.request.urlopen(url).read()
#ret = opener.open(url).read()
html=re.findall(r'''<div data-title="(.*?)" data-bulletin=".*?\n*?.*?" data-poiid=".*?" class="restaurant" data-all=".*?"''',str(ret,'utf8'))

today = datetime.date.today()
tm = time.strftime("%H:%M:%S")
sj = "{} {}".format(str(today), tm)
a=0
for i in html:
a+=1
if "72" in i:
print(i,a,sj)
main(i,a)
print("ok")
urllist = [
"http://waimai.meituan.com/home/ws0e9gmds0u7",
"http://waimai.meituan.com/home/ws0edu48zvm8",
"http://waimai.meituan.com/home/ws0e6v9brqq8",
"http://waimai.meituan.com/home/ws0ec83j0fbm",
"http://waimai.meituan.com/home/ws0efv4veqks",
"http://waimai.meituan.com/home/ws0edg19uxt6",
"http://waimai.meituan.com/home/ws0esdnh56um",
"http://waimai.meituan.com/home/ws0e937zdbph",
"http://waimai.meituan.com/home/ws0e3rfy1wxb",
"http://waimai.meituan.com/home/ws0eddzptu5e",
"http://waimai.meituan.com/home/ws0eeh52gdry",
"http://waimai.meituan.com/home/ws0e7jmpm28g",
"http://waimai.meituan.com/home/ws0e9pqmfr47",
"http://waimai.meituan.com/home/ws0edyvqgmrt",
"http://waimai.meituan.com/home/ws0edjnftj0h",
"http://waimai.meituan.com/home/ws0e3u0fb2gx",
"http://waimai.meituan.com/home/ws0ed9dq6x1f",
"http://waimai.meituan.com/home/ws0eehrxexqc",
"http://waimai.meituan.com/home/ws0dec6tzjwm",
"http://waimai.meituan.com/home/ws0e3txh5ym6",
"http://waimai.meituan.com/home/ws0ecqzp6n82",
"http://waimai.meituan.com/home/ws0e4g1dxshy",
"http://waimai.meituan.com/home/ws0g8ejh80rp",
"http://waimai.meituan.com/home/ws0ec37vje4d",
"http://waimai.meituan.com/home/ws0eg711k1t1",
"http://waimai.meituan.com/home/ws0dgmq924yy",
"http://waimai.meituan.com/home/ws0cff7x3m4u",
"http://waimai.meituan.com/home/ws0cg5zd5g4y",
"http://waimai.meituan.com/home/ws0ghenxxz82",
"http://waimai.meituan.com/home/ws0cfvkuzvtk",
"http://waimai.meituan.com/home/ws14dtvjhqm4",
"http://waimai.meituan.com/home/ws100stcewjn",
"http://waimai.meituan.com/home/ws104zssdsyp",
"http://waimai.meituan.com/home/ws102hkctrhh",
"http://waimai.meituan.com/home/ws10m19qgq7h",
"http://waimai.meituan.com/home/ws1079s3ek0m",
"http://waimai.meituan.com/home/ws0cq7hwhebm",
"http://waimai.meituan.com/home/ws10hyydu2f0",
"http://waimai.meituan.com/home/ws06vy2w07yr"

]
if __name__ == '__main__':
for i in urllist:
p = Process(target=paiming, args=(i,))
p.start()
time.sleep(10)


操作数据库的脚本:
#!/usr/bin/env python
# encoding: utf-8

"""
@version: ??
@author: phpergao
@license: Apache Licence
@file: SQL.py
@time: 2016/7/25 17:56
"""

import pymssql
class MSSQL:
"""
对pymssql的简单封装
pymssql库,该库到这里下载:http://www.lfd.uci.edu/~gohlke/pythonlibs/#pymssql
使用该库时,需要在Sql Server Configuration Manager里面将TCP/IP协议开启

用法:

"""

def __init__(self,host,user,pwd,db):
self.host = host
self.user = user
self.pwd = pwd
self.db = db

def __GetConnect(self):
"""
得到连接信息
返回: conn.cursor()
"""
if not self.db:
raise(NameError,"没有设置数据库信息")
self.conn = pymssql.connect(host=self.host,user=self.user,password=self.pwd,database=self.db,charset="utf8")
cur = self.conn.cursor()
if not cur:
raise(NameError,"连接数据库失败")
else:
return cur

def ExecQuery(self,sql):
"""
执行查询语句
返回的是一个包含tuple的list,list的元素是记录行,tuple的元素是每行记录的字段

调用示例:
ms = MSSQL(host="localhost",user="sa",pwd="123456",db="PythonWeiboStatistics")
resList = ms.ExecQuery("SELECT id,NickName FROM WeiBoUser")
for (id,NickName) in resList:
print str(id),NickName
"""
cur = self.__GetConnect()
cur.execute(sql)
resList = cur.fetchall()

#查询完毕后必须关闭连接
self.conn.close()
return resList

def ExecNonQuery(self,sql):
"""
执行非查询语句

调用示例:
cur = self.__GetConnect()
cur.execute(sql)
self.conn.commit()
self.conn.close()
"""
cur = self.__GetConnect()
cur.execute(sql)
self.conn.commit()
self.conn.close()

def main(ip,username,password,dbname,cmd):
## ms = MSSQL(host="localhost",user="sa",pwd="123456",db="PythonWeiboStatistics")
## #返回的是一个包含tuple的list,list的元素是记录行,tuple的元素是每行记录的字段
## ms.ExecNonQuery("insert into WeiBoUser values('2','3')")

ms = MSSQL(host=ip,user=username,pwd=password,db=dbname)
resList = ms.ExecQuery(cmd)
#print(resList)
for i in resList:
print(i)
#dele =ms.ExecNonQuery("DELETE FROM waimai4.dbo.baidu_rueren")
#update=ms.ExecNonQuery("UPDATE dbo.GOODS SET CLASSID='19' WHERE GOODSNAME LIKE'%牛肉%'")
if __name__ == '__main__':
pass
前端PHP网页脚本:

<!DOCTYPE html>
<html>
<head>
<?php
include "config.php";
session_start();
if(!isset($_SESSION['username'])){
$home_url = 'logIn.php';
header('Location:'.$home_url);
}
$ms=0;
$mt=0;
$ds=0;
$dt=0;

?>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>主页</title>

<script src="js/jquery-1.9.1.min.js" type="text/javascript"></script>
<script src="js/bootstrap-select.js" type="text/javascript"></script>
<script type="text/javascript" src="grid.js"></script>
<link id="bs-css" href="css/bootstrap-cerulean.min.css" rel="stylesheet">
<link href="css/charisma-app.css" rel="stylesheet">
<link href="css/bootstrap-select.css" rel="stylesheet">
<link href='bower_components/fullcalendar/dist/fullcalendar.css' rel='stylesheet'>
<link href='bower_components/fullcalendar/dist/fullcalendar.print.css' rel='stylesheet' media='print'>
<link href='bower_components/chosen/chosen.min.css' rel='stylesheet'>
<link href='bower_components/colorbox/example3/colorbox.css' rel='stylesheet'>
<link href='bower_components/responsive-tables/responsive-tables.css' rel='stylesheet'>

<link href='bower_components/bootstrap-tour/build/css/bootstrap-tour.min.css' rel='stylesheet'>
<link href='css/jquery.noty.css' rel='stylesheet'>
<link href='css/noty_theme_default.css' rel='stylesheet'>
<link href='css/elfinder.min.css' rel='stylesheet'>
<link href='css/elfinder.theme.css' rel='stylesheet'>
<link href='css/jquery.iphone.toggle.css' rel='stylesheet'>
<link href='css/uploadify.css' rel='stylesheet'>
<link href='css/animate.min.css' rel='stylesheet'>

<link href="grid.css" type="text/css" rel="stylesheet">
<script type="text/javascript" src="laydate/laydate.js"></script>
<script type="text/javascript" src="bower_components/responsive-tables/responsive-tables.js"></script>
<script type="text/javascript"> laydate.skin('danlan');</script>
<script src="js/jquery.noty.js"></script>

</head>
<body>
<!-- topbar starts -->
<?php include 'dropdownmenu.php' ?>
<!-- topbar ends -->
<div class="ch-container" style="position:relative;top:80px">
<div class="row">

<?php include 'navi.php'; ?>

<div id="content" class="col-lg-10 col-sm-10">
<!-- content starts -->
<div id="dlayerreport">

</div>

<div class="row">
<div class="box col-md-12">
<div class="box-inner">
<div class="box-header well">
<h2><i class="glyphicon glyphicon-info-sign"></i>美团店铺排名</h2>
<div class="box-icon">
<a href="orderlist.php" class="btn btn-minimize btn-round btn-default"><i class="glyphicon glyphicon-chevron-up"></i></a>
</div>
</div>
<div class="box-content row">
<div class="col-lg-7 col-md-12" style="width:100%;">
<div id="wepaydailysales" class="box-content" style="width:100%;">
<form id="grid_form_id">
<table class="table table-striped table-bordered bootstrap-datatable datatable responsive">
<thead><tr>
<!--<th class="th1" style="width:5%;">排序</th>-->
<th class="th1" style="width:15%;">店铺</th>
<th class="th2"  style="width:15%;">排名</th>
<th class="th5"  style="width:25%;">更新时间</th>
<th class="th4"  style="width:25%;"> 定位地址</th>

</tr></thead>
<?php

$serverName = "localhost";
$connectionInfo = array(  "UID"=>"stdservice", "PWD"=>"数据库密码","Database"=>"STDdata");
$conn = sqlsrv_connect( $serverName, $connectionInfo);
$queryString = "SELECT * FROM dbo.meituan_paiming ORDER BY paiming aSC";
if($result = sqlsrv_query($conn,$queryString))
{
$lb='';
$num=0;
while($row = sqlsrv_fetch_array( $result,SQLSRV_FETCH_ASSOC))
{
$num=$num+1;
//$action='<a class="btn btn-info" style="margin-left:10px;" data-toggle="modal" href="#menu" onclick="show(\''.$row['goodsid'].'\',\''.iconv("gbk//ignore", "utf-8",$row['goodsname']).'\');"><i class="glyphicon glyphicon-zoom-in icon-white"></i>修改</a>';
//$lb=iconv("gbk//ignore", "utf-8",$row['classid2']);
//if( strpos($lb, '1') !== false){
//$lb='超值';
//}else if( strpos($lb, '2') !== false){
//$lb='简餐';
//}else if(strpos($lb, '3') !== false){
//$lb='套餐';
//}else if (strpos($lb, '4') !== false){
//$lb='炖汤';
//}else if (strpos($lb, '5') !== false){
//$lb='小吃';
//}else if (strpos($lb, '6') !== false){
//$lb='积分换购';
//}else{
//$lb='未分类';
//};
//echo '<td>'.iconv("gbk//ignore", "utf-8",$num).'</td>';
echo '<td>'.iconv("gbk//ignore", "utf-8",$row['store_name']).'</td>';
//echo '<td>'.$row['store_name'].'</td>';
echo '<td>'.$row['paiming'].'</td>';

echo '<td>'.iconv("gbk//ignore", "utf-8",$row['updatetime']).'</td>';
echo '<td>'.iconv("gbk//ignore", "utf-8",$row['dingwei_address']).'</td>';
//echo '<td>'.$row['price'].'</td>';
//  echo '<td>'.$lb.'</td>';
// echo '<td>'.$row['classid'].'</td>';
//echo '<td>'.$row['meituanID'].'</td>';
// echo '<td><img border="0" width="80px" src="getimage.php?id='.$row['goodsid'].'" ></td>';
//echo '<td>'.$action.'</td>';
echo '</tr>';
}

}
sqlsrv_close($conn);

?>

<script language="javascript">
function show(itemid,itemname) {
$("#itemid").attr("value",itemid);
$("#itemname").html(itemname);
}
</script>

</table></form></div>
</div>
</div>
</div>
</div>
</div>

</div>

<div class="modal fade" id="menu" tabindex="-1" role="dialog" aria-labelledby="myModalLabel" aria-hidden="true" style="display: none;">
<div class="modal-dialog">
<div class="modal-content">
<div class="modal-header">
<button type="button" class="close" data-dismiss="modal">×</button>
<h3 id="itemname"></h3>
</div>
<div class="modal-body">
<form class="form-horizontal" action="action_updateimage.php" method="post"  enctype="multipart/form-data" >
<fieldset>
<div class="input-group input-group-lg">

<div style="display:none;" class="col-sm-8"><input type="text" name="itemid" id="itemid" class="form-control" readonly ="readonly"></div>
</div>

<div class="input-group input-group-lg">
<span class="input-group-addon"><i class="glyphicon glyphicon-folder-open red"></i></span>
<input type="file" name="FileUpload1" id="FileUpload1"  class="btn btn-primary green" />
</div>

</div>
<div  class="clearfix"></div><br>
<p style="text-align:center"> <button type="submit" name="submit" class="btn btn-primary" style="text-align:center">确认</button></p>
</form>
<div class="modal-footer">
<a href="rider" class="btn btn-default" data-dismiss="modal">关闭</a>
</div>
</div>
</div>
</div>

<?php include 'userprof.php'; ?>
<?php include 'footer.php'; ?>
</div>
</body>
</html>


最后执行完成后在前端显示的效果图:



内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  python version localhost