您的位置:首页 > 编程语言 > Python开发

[Python]_[批量下载网站文件]

2013-07-15 23:52 543 查看
场景:

1.有时候需要下载某个网站上提供的所有pdf文件,貌似没发现哟下载工具提供。

#! encoding=utf-8

import urllib2
import re
import os

def Download(url,output):
print "downloading..."+url
response = urllib2.urlopen(url)
resourceFile = open(output,"wb")
resourceFile.write(response.read())
resourceFile.close()
print "downloaded"

def Action(url,ext = "pdf",output = "."):

#1.domain
index = url.rfind("/");
domain = url[0:index+1];
print domain
request = urllib2.Request(url)
response = urllib2.urlopen(request)

#2.content
content = response.read()
#    print content

#3.resource
mode = '\"([^\"]+'+ext+')\"'
pattern = re.compile(mode)
strMatch = pattern.findall(content)
size = len(strMatch)
print "file num: "+str(size)
for i in range(0,size,1):
#        print strMatch[i]
one = strMatch[i]
partIndex = one.rfind('/')
if not one.startswith('http://'):
if -1!=partIndex:
directDir = one[0:partIndex+1]
else:
directDir = ""
#            print directDir
try:
os.makedirs(output+"/"+directDir)
except Exception,e:
pass
fileUrl = domain+one
fileOutput = output+"/"+one
print fileUrl
print fileOutput
Download(fileUrl,fileOutput)
else:
print one
print "........."
print one[partIndex:]
fileOutput = output+"/"+one[partIndex:]
print fileOutput
Download(one,fileOutput)
#5.download

if __name__=='__main__':
print "download"
url = "http://compgeom.cs.uiuc.edu/~jeffe/teaching/algorithms/";
Action("http://tech.qq.com/","jpg");
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: