您的位置:首页 > 运维架构 > 网站架构

批量下载matplotlib网站实例源码python脚本

2017-08-14 09:51 881 查看
模块功能描述:

该模块是为了批量下载matplotlib网站实例源码而设计。

getUrlList():函数是为了获取每个实例的url列表

GetDemoDownload():下载每个实例源码包括py与ipynb文件

#coding=utf8
'''
作者:ewang
日期:2017/8/14
模块功能描述:
该模块是为了批量下载matplotlib网站实例源码而设计。
getUrlList():函数是为了获取每个实例的url列表
GetDemoDownload():下载每个实例源码包括py与ipynb文件

'''
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
import os
import urllib2,re

PATH=lambda p:os.path.abspath(os.path.join(
os.path.dirname(__file__), p))
class downMatplotlibDemo():
def __init__(self):
self.urlList=self.getUrlList()
self.driver=webdriver.Chrome()
self.driver.maximize_window()
self.GetDemoDownload()
self.driver.close()

def getUrlList(self):
try:
url="http://matplotlib.org/devdocs/api/_as_gen/matplotlib.pyplot.subplots.html#matplotlib.pyplot.subplots"
matutl="http://matplotlib.org/devdocs/gallery/"
pageContent=urllib2.urlopen(url).read()
if pageContent:
linkList=re.findall('class="reference internal" href="../../gallery/(.*?)"><span class="std std-ref">(.*?)</span></a>', pageContent, re.S)
charList=[matutl+var[0] for var in linkList if len(linkList)>0]
return charList
except Exception,e:
print "Create UrlList Error:",e

def GetDemoDownload(self):
count=0
for url in self.urlList:
self.driver.get(url)
js="var q=document.body.scrollTop=200000"
self.driver.execute_script(js)
try:
downLoadBtnList=WebDriverWait(self.driver,5).until(lambda driver:driver.find_elements_by_partial_link_text('Download'))
except Exception,e:
print "Download not exist:",e

try:
if len(downLoadBtnList)>0:
for downLoad in downLoadBtnList:
downurl=downLoad.get_attribute("href")
if downurl:
fileName=downurl.split("/")[-1]
if fileName:
filePath=PATH('./sourceCode/')
if os.path.exists(filePath):
pass
else:
os.mkdir(filePath)
fileWithPath=PATH(filePath+'\\'+fileName)
if not os.path.exists(fileWithPath):
with open(fileWithPath,"wb+") as FH:
pageConet=urllib2.urlopen(downurl).read()
FH.write(pageConet)
else:
print "the file with path is exists...."
else:
print "The file name is null!"
else:
print "the download url is null!"
except Exception,e:
print "Download List:",e
count+=1
print count,"\t url=",downurl

if __name__=="__main__":
downMatplotlibDemo()
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: