通过设置Referer反"反盗链"
2016-01-28 19:27
323 查看
package cn.searchphoto.util; import java.io.File; import java.io.FileOutputStream; import java.io.InputStream; import java.io.OutputStream; import java.net.URL; import java.net.URLConnection; import java.util.zip.GZIPInputStream; /** * 下载远程网站的图片,通过设置Referer反反盗链。 * * @author JAVA世纪网(java2000.net, laozizhu.com) */ public class ImageDownloader { /** * 下载文件到指定位置 * @param imgurl 下载连接 * @param f 目标文件 * @return 成功返回文件,失败返回null */ public static File download(String imgurl, File f) { try { URL url = new URL(imgurl); URLConnection con = url.openConnection(); int index = imgurl.indexOf("/", 10); con.setRequestProperty("Host", index == -1 ? imgurl.substring(7) : imgurl.substring(7, index)); con.setRequestProperty("Referer", imgurl); InputStream is = con.getInputStream(); if (con.getContentEncoding() != null && con.getContentEncoding().equalsIgnoreCase("gzip")) { is = new GZIPInputStream(con.getInputStream()); } byte[] bs = new byte[1024]; int len = -1; OutputStream os = new FileOutputStream(f); try { while ((len = is.read(bs)) != -1) { os.write(bs, 0, len); } } finally { try { os.close(); } catch (Exception ex) {} try { is.close(); } catch (Exception ex) {} } return f; } catch (Exception ex) { ex.printStackTrace(); return null; } } }
#1 cookie的处理 import urllib2, cookielib cookie_support= urllib2.HTTPCookieProcessor(cookielib.CookieJar()) opener = urllib2.build_opener(cookie_support, urllib2.HTTPHandler) urllib2.install_opener(opener) content = urllib2.urlopen('http://XXXX').read() #2 用代理和cookie opener = urllib2.build_opener(proxy_support, cookie_support, urllib2.HTTPHandler) #3 表单的处理 import urllib postdata=urllib.urlencode({ 'username':'XXXXX', 'password':'XXXXX', 'continueURI':'http://www.verycd.com/', 'fk':fk, 'login_submit':'登录' }) req = urllib2.Request( url = 'http://secure.verycd.com/signin/*/http://www.verycd.com/', data = postdata ) result = urllib2.urlopen(req).read() #4 伪装成浏览器访问 headers = { 'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6' } req = urllib2.Request( url = 'http://secure.verycd.com/signin/*/http://www.verycd.com/', data = postdata, headers = headers ) #5 反”反盗链” headers = { 'Referer':'http://www.cnbeta.com/articles' }
#6 多线程并发抓取 from threading import Thread from Queue import Queue from time import sleep #q是任务队列 #NUM是并发线程总数 #JOBS是有多少任务 q = Queue() NUM = 2 JOBS = 10 #具体的处理函数,负责处理单个任务 def do_somthing_using(arguments): print arguments #这个是工作进程,负责不断从队列取数据并处理 def working(): while True: arguments = q.get() do_somthing_using(arguments) sleep(1) q.task_done() #fork NUM个线程等待队列 for i in range(NUM): t = Thread(target=working) t.setDaemon(True) t.start() #把JOBS排入队列 for i in range(JOBS): q.put(i) #等待所有JOBS完成 q.join()
相关文章推荐
- 通过设置Referer反"反盗链"
- 前端基本功之选择题
- Referer反反盗链
- IE, FireFox, Opera 浏览器支持CSS实现Alpha透明的方法 兼容问题
- Referer反反盗链
- 11、BaseJsonRes
- The different of bit Compiler
- 对Communication between native and React Native官方例子的纠正
- CSS3—六边形
- javascript方式实现无缝滚动(两种方式)
- 理解js的prototype原型对象
- 2016年1月-前端开发月刊
- CSS3—三角形
- AngularJS 学习笔记
- alertify、js、css 使用简介
- node.js学习笔记之React
- javascript高级程序设计第三章
- 【转】CSS实现兼容性的渐变背景(gradient)效果
- Jquery中的bind(),live(),delegate(),on()绑定事件方式
- 135 js 高程6.1