python自动获取图片
2013-08-02 11:29
267 查看
#!/usr/bin/python #coding=utf-8 #post by EvilBinary 小E #Filename: get.py import urllib,urllib2,cookielib,re,os import fnmatch,sys,time,random import time import thread import threading import hashlib cookie=cookielib.CookieJar() def getUrlContent(url,action): try: opener=urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie)) agents = ["Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0)","Internet Explorer 7 (Windows Vista); Mozilla/4.0 ","Google Chrome 0.2.149.29 (Windows XP)","Opera 9.25 (Windows Vista)","Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.1)","Opera/8.00 (Windows NT 5.1; U; en)"] agent = random.choice(agents) #agent=agents[4] print agent opener.addheaders=[('User-agent',agent)] urllib2.install_opener(opener) print "GET "+url+action req=urllib2.Request(url+action) u=urllib2.urlopen(req) content=u.read() return content except Exception,e: print 'Error' print e def getImageUrl(content): #str='<img[\s\S]*src="(.*?)"' str='src="(.*?)"' reObj=re.compile(str) allMatch=reObj.findall(content) #print allMatch if allMatch: print "fount:",len(allMatch) return allMatch else: print "no fount image url" return '' def downLoadImg(url,name): try: req=urllib2.Request(url) u=urllib2.urlopen(req) content=u.read() f = open(os.getcwd()+ '/'+name, "w+b") f.write(content) f.close() except Exception,e: print 'Error',e class MyThread(threading.Thread): def __init__(self, threadname,imageUrl,imageName): threading.Thread.__init__(self, name=threadname) self.imgUrl=imageUrl self.imgName=imageName def run(self): try: downLoadImg(self.imgUrl,self.imgName) time.sleep(1) print '%s is running......done.'%self.getName() except Exception,e: print 'Error',e if __name__ == "__main__": try: if len(sys.argv) < 2: print "usage :", sys.argv[0], "<url>" exit(1) else: url= sys.argv[1] content=getUrlContent(url,'') #f = open(os.getcwd()+ '/ret.html', "w+b") #f.write(content) #f.close() #print unicode(content,'utf-8','ignore').encode('gbk','ignore') imgUrl=getImageUrl(content) for url in imgUrl: urlHash=hashlib.sha1(url).hexdigest() imgName=url.split('/')[-1] imgExt=url.split('.')[-1] print url,imgName,imgExt if imgExt=='jpg' or imgExt=='gif' or imgExt=='jpeg' or imgExt=='png' or imgExt=='jpeg': myThread=MyThread('evilbinary-'+imgName,url,'img/'+urlHash+'_'+imgName) myThread.start() #print unicode(imgUrl,'utf-8','ignore').encode('gbk','ignore') except Exception,e: print 'Error',e
相关文章推荐
- 自动获取wordpress日志中的第一张图片作为缩略图
- python获取糗百图片代码实例
- python爬虫自动提交HDU并获取AC状态(p3+request+Beatifulsoup)
- python编写的自动获取代理IP列表的爬虫-chinaboywg-ChinaUnix博客
- Python通过PIL获取图片主要颜色并和颜色库进行对比的方法
- python爬虫(9)获取动态搞笑图片
- 我的第一个python爬虫程序(从百度贴吧自动下载图片)
- iOS开发- 相机(摄像头)获取到的图片自动旋转90度解决办法
- Python实现获取当前公网ip并且自动断开宽带连接功能
- 需求(Java);利用Jsoup架包获取指定网页的全部图片,并自动下载到指定文件夹中
- Python 获取sina首页所有jpg图片
- 广告轮换动态获取图片flash,asp代码从sql数据库自动获取。
- python爬虫获取京东手机图片
- python爬虫获取京东手机图片的图文教程
- python 获取网页图片
- Python实现简单的获取图片爬虫功能示例
- python获取糗百图片代码实例
- 如何使用python自动登录路由器且获取页面内容
- iOS开发- 相机(摄像头)获取到的图片自动旋转90度解决办法
- python获取网页中所有图片并筛选指定分辨率的方法