python3.6+BeautifulSoup4 爬取360手机助手app应用的信息并存储数据库 批量下载apk
2018-02-01 20:52
686 查看
源码:
#/usr/bin/python #encoding:utf-8 ''' Created on 2018年01月12日 @author: xianqingchen ''' import requests from bs4 import BeautifulSoup import os from urllib.request import urlopen import pymysql def GetAppinfo(urlhead,page): head = {} #设置头 head['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0' #获取url路径 get_url=urlhead; #模拟浏览器,定制http请求头 try: appinfo_html=requests.get(url=get_url,headers = head) # UTF-8模式读取获取的页面信息标签和内容 appinfo_xml=BeautifulSoup(appinfo_html.text,'lxml'); #获取应用中的所有分类的标签xml appin_subcalssxml1=appinfo_xml.find_all('ul',{"class":"select"}) appin_subcalssxml2=appin_subcalssxml1[0].find_all('a') except: print("父类标签页面,出现异常,终止") # 连接数据库 connect=pymysql.Connect( host='localhost', port=3306, user='root', passwd='cecgw', db='app', charset='utf8' ) # 获取游标 cursor = connect.cursor() for appin_a in appin_subcalssxml2: href=appin_a.get('href') if href.find('/list/index/cid')==-1: pass else: if href=='/list/index/cid/1/': pass else: appsubclassname=appin_a.get_text() for page in range(1,page+1): dict1={} try: appin_subclaurl='http://zhushou.360.cn'+href+'?page='+str(page) appinfo_html=requests.get(url=appin_subclaurl,headers = head) appinfo_xml=BeautifulSoup(appinfo_html.text,'lxml'); appinfo_appullist=appinfo_xml.find_all('ul',{'class','iconList'}) appinfo_applilist=appinfo_appullist[0].find_all('li') except: print("appsubcalss exception",appin_subclaurl) for appinfo_appxml in appinfo_applilist: applinkt1=appinfo_appxml.find_all('h3')[0] app_name=applinkt1.get_text() apphref=applinkt1.find_all('a')[0].get('href') #app的详情的url appurl='http://zhushou.360.cn'+apphref # print(appurl) applinka=appinfo_appxml.find_all('a') for applinkaa in applinka: appa=applinkaa.get('class') try: if appa is not None: if len(appa)==3: if appa[2].find('normal')==-1: pass else: #app的下载url app_loadurl=applinkaa.get('href').split('url=')[1] # appDownload(app_loadurl) #获取app详情的页码的xml appdeinfo_html=requests.get(url=appurl,headers = head) appdeinfo_xml=BeautifulSoup(appdeinfo_html.text,'lxml'); appdepf=appdeinfo_xml.find_all('div',{'class':'pf'})[0] appdebreif=appdeinfo_xml.find_all('div',{'class':'breif'})[0] except: print("appdeinfo exception",appurl) #app的详情 appscore=appdepf.find_all('span')[0].get_text() appscounts=appdepf.find_all('span')[3].get_text().split(':')[1] appsize=appdepf.find_all('span')[4].get_text() appauthor=appdebreif.find_all('td')[0].get_text().split(':')[1] appdate=appdebreif.find_all('td')[1].get_text().split(':')[1] appver=appdebreif.find_all('td')[2].get_text().split(':')[1] appsyst=appdebreif.find_all('td')[3].get_text().split(':')[1] applan=appdebreif.find_all('td')[4].get_text().split(':')[1] data=(appsubclassname,app_name,appscore,appscounts,appsize,appauthor,appdate, appver,appsyst,applan,appurl,app_loadurl) try: # 插入数据 sql ="""INSERT INTO AppInfo(subclass, appname, score,counts,size,author,update1,version,supsystem,language1,appurl,loadurl) VALUES( '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s' )""" cursor.execute(sql % data) connect.commit() except: print('数据库存储异常',data) break; break; # 关闭连接 cursor.close() connect.close() def appDownload(url): file_name=url.split('/')[-1].strip() u = urlopen(url) path=os.path.abspath("..")+'/pak/' f = open(path+file_name, 'wb') block_sz = 8192 while True: buffer = u.read(block_sz) if not buffer: break f.write(buffer) f.close() print ("Sucessful to download" + " " + file_name) if __name__ == '__main__': url='http://zhushou.360.cn/list/index/cid/1/'; page=1 app_dict=GetAppinfo(url,page)
下载结果:
相关文章推荐
- python3.6+BeautifulSoup4.2 爬取各类app应用信息并下载app包
- python爬手入门-爬取百度应用市场apk信息并下载
- python批量下载色影无忌和蜂鸟的图片 爬虫小应用
- Python批量下载网页中的表格存储到CSV文件中
- TensorFlow 1.2.0新版本完美支持Python3.6,windows在cmd中输入pip install tensorflow就能下载应用最新tensorflow
- 世纪佳缘信息爬取存储到mysql,下载图片到本地,从数据库选取账号对其发送消息更新发信状态
- 安装MySql+连接数据库+读取数据并存储成dataframe(python3.6)
- 使用python编写脚本获取手机当前应用apk的信息
- [python爬虫] 招聘信息定时系统 (一).BeautifulSoup爬取信息并存储MySQL
- python批量获取apk软件详细信息的实现
- 如何从google play下载app应用,直接下载apk
- WP8__从windowsphone app store 中根据app id获取应用的相关信息(下载网址及图片id等)
- 如何从google play下载app应用,直接下载apk
- 世纪佳缘信息爬取存储到mysql,下载图片到本地,从数据库选取账号对其发送消息更新发信状态
- Python抓取网页&批量下载文件方法初探(正则表达式+BeautifulSoup) (转)
- 使用python编写脚本获取手机当前应用apk的信息
- Python抓取网页&批量下载文件方法初探(正则表达式+BeautifulSoup)
- 世纪佳缘信息爬取存储到mysql,下载图片到本地,从数据库选取账号对其发送消息更新发信状态
- Python批量下载apk
- python网络爬虫与信息采取之下载存储数据(一)-----下载储存媒体文件模板