[工具类] 获取URL编码1
2015-11-24 16:56
351 查看
package com.claw.util.html; import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.net.HttpURLConnection; import java.net.URL; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class Charset { /** * @param args */ public static void main(String[] args) { List<String> list = new ArrayList<String>(); list.add("http://li200429.iteye.com/blog/1608758"); list.add("http://blog.csdn.net/vic0228/article/details/49634311"); list.add("http://www.zhihu.com/"); list.add("http://www.sohu.com/"); list.add("http://blog.163.com/wenchangqing_live/blog/static/173722309201182044545864/"); /* for (String url : list) { String html = getHTML(url); String title = getTitle(html); System.out.println("url:"+url+" ----- title:"+title); if(title.equals("")){ System.out.println(html); } }*/ } public static String getCharset(InputStream in){ String charset = "UTF-8"; BytesEncodingDetect s = new BytesEncodingDetect(); byte[] b = new byte[1024]; try { int length = in.read(b); String encode = BytesEncodingDetect.nicename[s.detectEncoding(b)]; if(encode.equals("GB-2312")){ encode = "GBK"; } /*if(encode.equals("ASCII")){ encode = "UTF-8"; }*/ charset = encode; } catch (Exception e) { e.printStackTrace(); } return charset; } /** * 404有问题 暂时停用 * @param urlStr * @return */ public static String getCharset(String urlStr) { String charset = "UTF-8"; URL url = null; BufferedInputStream in = null; try { url = new URL(urlStr); HttpURLConnection conn = (HttpURLConnection) url.openConnection(); conn.setConnectTimeout(10000); conn.setRequestProperty("User-Agent", "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0)"); conn.connect(); int status = conn.getResponseCode(); System.out.println(status); if(status==200){ in = new BufferedInputStream(conn.getInputStream()); BytesEncodingDetect s = new BytesEncodingDetect(); StringBuffer sb = new StringBuffer(); byte[] b = new byte[1024]; int length = in.read(b); String encode = BytesEncodingDetect.nicename[s.detectEncoding(b)]; System.out.println("encode:" + encode); if(encode.equals("GB-2312")){ encode = "GBK"; } charset = encode; }else if(status==404){ } } catch (Exception e) { System.out.println(urlStr); e.printStackTrace(); } finally { if (in != null) try { in.close(); } catch (IOException e) { System.out.println(urlStr); e.printStackTrace(); } } return charset; } }
相关文章推荐
- sql:oracle, CURSOR
- nyoj--1057--寻找最大数(三)(贪心)
- matlab load
- sturts调用支付宝接口。
- python文件头--文件编码指定
- IOS 观察者模式
- volatile关键字作用
- PDP Context 激活流程(UE发起)
- angularJS学习之路(二十五)---创建服务的五大方法---provider
- 线要素的节点提取
- 将线图层在折点出断开
- 将CAD数据转为shp
- 如何在路由器的局域网下使用IIS发布网页
- 关于C#中对象用作属性
- ASP.net(NVelocity)中浏览器端与服务器端频繁交互传值的问题
- 消除字号标签&lt;h1&gt;&lt;h2&gt;&lt;h3&gt;的自动换行
- 关于select标签的value
- VS中为代码添加虚线
- LightOJ 1036 - A Refining Company(DP)
- ODBC位置