您的位置:首页 > 编程语言 > Python开发

Python 批量下载xkcd漫画

2015-12-20 11:48 726 查看
#coding=utf-8
import urllib
import re

#start page number
start = 1
#end page number
end = 1613

prevUrl = 'http://xkcd.com/'

#download html file
def getHtml(url):
page = urllib.urlopen(url)
html = page.read()
return html

#parse comic image url from html
def getImgUrl(html):
reg = r'src="(.+?\.png)" title='
imgre = re.compile(reg)
imglist = re.findall(imgre,html)
if(len(imglist) > 0):
return imglist

reg = r'src="(.+?\.jpg)" title='
imgre = re.compile(reg)
imglist = re.findall(imgre,html)

return imglist

#down load comic image and save it to file with name
def getImg(url,name):
conn = urllib.urlopen(url)
f = open(name,'wb')
f.write(conn.read())
f.close()

#test function
def loopPrintUrl(imglist):
for imgurl in imglist:
url = 'http:' + imgurl
print (url)

#append image download url
def getImgFileNameFromUrl(url):
strlist = url.split('/')
return strlist[4]

# download xkcd comic image
def loopDownLoadXKCDImg():
for i in range(start,end + 1):
downloadUrl = prevUrl + str(i) + '/'
#print(downloadUrl)
html = getHtml(downloadUrl)
#print(html)
urlList = getImgUrl(html)
for tmpurl in urlList:
filename = str(i)+ "_" + getImgFileNameFromUrl(tmpurl)
imgDownLoadurl = "http:" + tmpurl
getImg(imgDownLoadurl,filename)
print (str(i) + "    " + imgDownLoadurl + " -> down")

loopDownLoadXKCDImg()


start 是起始漫画索引

end是结束漫画的索引(此脚本写完的时候xkcd最新的是1613张)

以上代码在 Python2.7测试通过
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: