您的位置:首页 > 其它

[工具类] 获取URL编码1

2015-11-24 16:56 351 查看
package com.claw.util.html;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Charset {

/**
* @param args
*/
public static void main(String[] args) {
List<String> list = new ArrayList<String>();

list.add("http://li200429.iteye.com/blog/1608758");
list.add("http://blog.csdn.net/vic0228/article/details/49634311");
list.add("http://www.zhihu.com/");
list.add("http://www.sohu.com/");
list.add("http://blog.163.com/wenchangqing_live/blog/static/173722309201182044545864/");

/*        for (String url : list) {
String html = getHTML(url);
String title = getTitle(html);
System.out.println("url:"+url+" ----- title:"+title);
if(title.equals("")){
System.out.println(html);
}
}*/
}

public static String getCharset(InputStream in){
String charset = "UTF-8";
BytesEncodingDetect s = new BytesEncodingDetect();
byte[] b = new byte[1024];
try {
int length = in.read(b);
String encode = BytesEncodingDetect.nicename[s.detectEncoding(b)];
if(encode.equals("GB-2312")){
encode = "GBK";
}
/*if(encode.equals("ASCII")){
encode = "UTF-8";
}*/
charset = encode;
} catch (Exception e) {
e.printStackTrace();
}
return charset;
}

/**
* 404有问题 暂时停用
* @param urlStr
* @return
*/
public static String getCharset(String urlStr) {
String charset = "UTF-8";
URL url = null;
BufferedInputStream in = null;
try {
url = new URL(urlStr);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setConnectTimeout(10000);
conn.setRequestProperty("User-Agent",
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0)");
conn.connect();

int status = conn.getResponseCode();
System.out.println(status);
if(status==200){
in = new BufferedInputStream(conn.getInputStream());
BytesEncodingDetect s = new BytesEncodingDetect();
StringBuffer sb = new StringBuffer();
byte[] b = new byte[1024];
int length = in.read(b);
String encode = BytesEncodingDetect.nicename[s.detectEncoding(b)];
System.out.println("encode:" + encode);
if(encode.equals("GB-2312")){
encode = "GBK";
}
charset = encode;
}else if(status==404){

}
} catch (Exception e) {
System.out.println(urlStr);
e.printStackTrace();
} finally {
if (in != null)
try {
in.close();
} catch (IOException e) {
System.out.println(urlStr);
e.printStackTrace();
}
}
return charset;
}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: