您的位置：首页 > 编程语言 > Python开发

python实现简单抓图并打包成exe

2015-05-28 23:43 585 查看

打包：

在CMD命令行运行:python setup.py py2exe

注意:

1.需要安装py2exe

2.更改setup.py中对相应的py文件进行打包

3.打包后包括exe文件在内的必须文件都在dist目录下

4.用命令行跑时，应先将路径cd到py文件的目录下

setup.py:

#!/usr/bin/env python

#coding=utf-8

from distutils.core import setup

import py2exe

file="python_oschina_picture_new.py"#.decode('utf-8')

setup(console=[file])

python代码：(python_oschina_picture_new.py)

#!/usr/bin/env python

#coding=utf-8

import urllib

import urllib2

import os

import re

import sys

BaseURL='http://www.mzitu.com/' #首页

Module=['model'] #需要下载的模块

#显示下载进度

def schedule(a,b,c):

"""

a:已经下载的数据块

b:数据块的大小

c:远程文件的大小

"""

per = 100.0 * a * b / c

if per > 100 :

per = 100

print '%.2f%%' % per

def GetHtml(URL):

#req = urllib2.Request('http://192.168.1.2/')

#req.add_header('User-agent', 'Mozilla 5.10') #python使用自定义user-agent抓取网页

#content=urllib2.urlopen(req).read()#.decode('utf-8') #解码后打印网页内容不乱码，也可以后面再处理乱码情况s

content=urllib.urlopen(URL).read()

return content

#获取资源页数

def GetPage(URL,content,flag):

if flag==0:

regex=URL+'/page/(\d*)\s*\'\s*>\s*<span>\s*\d*\s*</span>' #<a class='page-numbers' href='http://www.mzitu.com/model/page/47'><span>47</span></a>

elif flag==1:

regex='span\s*class\s*=\s*\'dots\'.*?</span>\s*<\s*a\s*href\s*=\s*\'\s*'+URL+'/(\d*)\'>' #<span class='dots'>…</span><a href='http://www.mzitu.com/41633/30'>

res=re.findall(regex,content,re.S) #re.S 表示多行匹配

return res.pop()

#获取列表

def GetList(content):

regex=BaseURL+'(\d*)\"\s*title\s*=\s*\"(.*?)\"\s*target\s*=\s*\"_blank\s*\">.*?</a>\s*</h2>' #"http://www.mzitu.com/41107" title="女神许诺白皙翘臀美腿秒杀宅男" target="_blank">女神许诺白皙翘臀美腿秒杀宅男</a></h2>

list=re.findall(regex,content,re.S) #re.S 表示多行匹配

return list

#下载资源

def Download(URL,num,dirName):

Dir='%s' % unicode(dirName,'utf-8') #对中文(需要用UNICODE显示的字符)进行重新编码

try:

if not os.path.exists(Dir): #路径不存在时创建一个

os.makedirs(Dir)

picture = Dir+'/%s.jpg' % num

if os.path.exists(picture):

return

regex='href\s*=\s*\"'+URL+'/{0,1}\d*\".*?<\s*img\s*src\s*=\s*\"(.*?)\"\s*alt' #<p><a href="http://www.mzitu.com/41633/3" ><img src="http://pic.dofay.com/2015/05/27t02.jpg" alt="长发红唇性感睡衣美女夏瑶秒杀宅男" /></a></p>

if (int(num))==1:

htmlURL=URL

else:

htmlURL=URL+'/'+num

url=re.findall(regex,GetHtml(htmlURL),re.S)

print 'downloading',picture

#urllib.urlretrieve(url[0],picture,schedule) #显示下载进度

urllib.urlretrieve(url[0],picture) #不显示下载进度

except:

print 'exception!','when download',Dir,num+'.jpg'

#下载资源

def Handle(URL):

content=GetHtml(URL)

for page in range(1,int(GetPage(URL,content,0))+1):

if page==1:

url=URL

else:

url=URL+'/page/'+str(page)

content=GetHtml(url)

list=GetList(content)

#for x in list:

#file=open('E:/study/python/study/picture.txt','a')

#file.write(x[0]+'\n')

for L in list:

URL=BaseURL+L[0]

content=GetHtml(URL)

for page_jpg in range(1,int(GetPage(URL,content,1))+1):

Download(URL,str(page_jpg),str(L[1]))

if __name__=='__main__':

for list in Module:

Handle(BaseURL+list)

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航