您的位置:首页 > 编程语言 > Python开发

python下载文件DEMO

2014-07-18 19:59 253 查看
#coding=utf-8

'''

Created on 2013-7-17

@author: zinan.zhang

'''

import re

import time

import httplib2

import urllib

from bs4 import BeautifulSoup

#路径

savePath = 'F://TDDOWNLOAD//aNO.4//'

#获取url

def url_xunhuan(url,list):

return url + list

#下载图片的时候

time.sleep(0.5)#先sleep,再读取数据

"""根据url下载文件,文件名自动从url获取"""

def gDownload(url,savePath):

#参数检查,现忽略

fileName = gGetFileName(url)

#fileName =gRandFilename('jpg')

gDownloadWithFilename(url,savePath,fileName)

"""根据url获取文件名"""

def gGetFileName(url):

if url==None: return None

if url=="" : return ""

arr=url.split("/")

return arr[len(arr)-1]

"""根据url下载文件,文件名参数指定"""

def gDownloadWithFilename(url,savePath,file):

#参数检查,现忽略

try:

urlopen=urllib.URLopener()

fp = urlopen.open(url)

data = fp.read()

fp.close()

file=open(savePath + file,'w+b')

file.write(data)

print "下载成功:"+ url

file.close()

except IOError:

print "下载失败:"+ url

#初始化页面,提取必要信息

def getPage(url):

userAgent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)'

headers = {

'User-Agent': userAgent,

'Accept-Language': 'zh-CN,zh;q=0.8',

'Accept': 'text/css,*/*;q=0.1',

}

http = httplib2.Http(".cache")

_, content = http.request(url, 'GET',headers=headers)

return content

#循环下载列表固定的 ---就是wallpaper,enterdesk等网站

def xuanhuan_down_list():

list=[]

url = 'http://tupian.enterdesk.com/2013/mxy/0311/4/'

temp=10

for i in range(temp):

list.append(str(i)+'.jpg')

for i in range(temp):

url_list = url_xunhuan(url,list[i])

gDownload(url_list,savePath)

time.sleep(0.2)

#爬网获取url

def spider_url(url):

page = getPage(url)

dom = BeautifulSoup(page)

srcs = [x['src'] for x in dom.findAll('img')]

#成功获取url

return srcs[0]

#循环下载列表随机的 ---就是ZOL桌面壁纸下载

def xuanhuan_down_suiji():

try:

temp=25

i=88

j=i

while (i <= j+temp):

#http://desk.zol.com.cn/showpic/1920x1200_30688_33.html

url = 'http://desk.zol.com.cn/showpic/1920x1200_12'+str(i)+'_37.html'

src_url = spider_url(url)

gDownload(src_url,savePath)

time.sleep(0.1)

i+=1

except IOError:

print "url获取失败!"

if __name__ == "__main__":

#gDownload(url,savePath)

'''

#批量下载序号固定的图片

xuanhuan_down_list()

'''

'''

#批量下载隐藏jpg路径的文件

xuanhuan_down_suiji()

'''

#批量下载文件

转载:http://www.cnblogs.com/dyllove98/archive/2013/07/19/3201162.html
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: