您的位置:首页 > 理论基础 > 计算机网络

通过代理抓取网页code方法 proxy httpurlconnection

2015-01-22 13:40 656 查看
非常简单,非常容易用--可以可连接

package com.wanju.project001.zonghe.common.util.wjgate;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class WJWrapProxyTool  {

/**
* 通过代理方式来联网的
* @param url
* @param encode
* @param proxy
* @return
*/
public static String getHtmlContent(URL url, String encode,String proxyflg) {
StringBuffer contentBuffer = new StringBuffer();

int responseCode = -1;
HttpURLConnection con = null;
try {
Proxy proxy = new Proxy(java.net.Proxy.Type.HTTP,new InetSocketAddress("sswwdwdddssxxxxx", 8080));
//            con = (HttpURLConnection) url.openConnection();//原来连接方式
con = (HttpURLConnection) url.openConnection(proxy);
con.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");// IE�����������
con.setConnectTimeout(60000);
con.setReadTimeout(60000);
// �����ҳ������Ϣ��
responseCode = con.getResponseCode();
if (responseCode == -1) {
System.out.println(url.toString() + " : connection is failure...");
con.disconnect();
return null;
}
if (responseCode >= 400) // ����ʧ��
{
System.out.println("����ʧ��:get response code: " + responseCode);
con.disconnect();
return null;
}

InputStream inStr = con.getInputStream();
InputStreamReader istreamReader = new InputStreamReader(inStr, encode);
BufferedReader buffStr = new BufferedReader(istreamReader);

String str = null;
while ((str = buffStr.readLine()) != null)
contentBuffer.append(str);
inStr.close();
} catch (IOException e) {
e.printStackTrace();
contentBuffer = null;
System.out.println("error: " + url.toString());
} finally {
con.disconnect();
}
return contentBuffer.toString();
}

//返回所有
public static String getHtmlContent(String url, String encode) {
if (!url.toLowerCase().startsWith("http://")) {
url = "http://" + url;
}
try {
URL rUrl = new URL(url);
return getHtmlContent(rUrl, encode,"proxy");
} catch (Exception e) {
e.printStackTrace();
return null;
}
}

public static void main(String[] args) {
String content = getHtmlContent("http://www.baidu.com", "gbk");
System.out.println("content length"+content);
}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐