tokenHTML,JDom,httpClient综合应用
2008-12-26 19:25
281 查看
1. 获取html 内容
private String getPageContent(String url){
String content = "";
HttpClient httpClient = new HttpClient();
GetMethod getMethod = new GetMethod( url );
getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,new DefaultHttpMethodRetryHandler());
try {
int statusCode = httpClient.executeMethod(getMethod);
if (statusCode != HttpStatus.SC_OK) {
System.err.println("Method failed: "+ getMethod.getStatusLine());
}
byte[] responseBody = getMethod.getResponseBody();
content =new String(responseBody) ;
} catch (HttpException e) {
System.out.println("Please check your provided http address!");
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
getMethod.releaseConnection();
}
return content;
}
所用依赖包
<dependency>
<groupId>commons-httpclient</groupId>
<artifactId>commons-httpclient</artifactId>
<version>3.1</version>
</dependency>
2 NekoHTML 标签补偿应用
try {
String content = "<tr><td>test</td><<td>hello</td></tr>
InputSource inputSource = new InputSource( new StringReader( content ));
parser.parse( inputSource );
org.w3c.dom.Document doc = parser.getDocument();
} catch ( Exception e) {
e.printStackTrace();
}
所用依赖包
<dependency>
<groupId>net.sourceforge.nekohtml</groupId>
<artifactId>nekohtml</artifactId>
<version>1.9.9</version>
</dependency>
3 org.3c.document 转jdom.Document
org.w3c.dom.Document doc = parser.getDocument();
DOMBuilder builders = new DOMBuilder();
org.jdom.Document jDoc = builders.build(doc);
所用依赖包
<dependency>
<groupId>org.jdom</groupId>
<artifactId>jdom</artifactId>
<version>1.1</version>
</dependency>
4 jdom递归访问
process (jDoc.getRootElement());
public void process(Element element){
inspect(element);
List content=element.getContent();//取元素的所有内容
Iterator iterator=content.iterator();
while(iterator.hasNext()){
Object o=iterator.next();
if(o instanceof Element){//如果是子元素
Element child=(Element)o;
process(child);//递归调用
}else if(o instanceof Comment){//如果是说明
Comment c=(Comment)o;
System.out.println(c.getText());
// System.out.println();
}
}
}
public void inspect(Element element){ //element 为org.jdom.Element
//do some thing
//
}
private String getPageContent(String url){
String content = "";
HttpClient httpClient = new HttpClient();
GetMethod getMethod = new GetMethod( url );
getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,new DefaultHttpMethodRetryHandler());
try {
int statusCode = httpClient.executeMethod(getMethod);
if (statusCode != HttpStatus.SC_OK) {
System.err.println("Method failed: "+ getMethod.getStatusLine());
}
byte[] responseBody = getMethod.getResponseBody();
content =new String(responseBody) ;
} catch (HttpException e) {
System.out.println("Please check your provided http address!");
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
getMethod.releaseConnection();
}
return content;
}
所用依赖包
<dependency>
<groupId>commons-httpclient</groupId>
<artifactId>commons-httpclient</artifactId>
<version>3.1</version>
</dependency>
2 NekoHTML 标签补偿应用
try {
String content = "<tr><td>test</td><<td>hello</td></tr>
InputSource inputSource = new InputSource( new StringReader( content ));
parser.parse( inputSource );
org.w3c.dom.Document doc = parser.getDocument();
} catch ( Exception e) {
e.printStackTrace();
}
所用依赖包
<dependency>
<groupId>net.sourceforge.nekohtml</groupId>
<artifactId>nekohtml</artifactId>
<version>1.9.9</version>
</dependency>
3 org.3c.document 转jdom.Document
org.w3c.dom.Document doc = parser.getDocument();
DOMBuilder builders = new DOMBuilder();
org.jdom.Document jDoc = builders.build(doc);
所用依赖包
<dependency>
<groupId>org.jdom</groupId>
<artifactId>jdom</artifactId>
<version>1.1</version>
</dependency>
4 jdom递归访问
process (jDoc.getRootElement());
public void process(Element element){
inspect(element);
List content=element.getContent();//取元素的所有内容
Iterator iterator=content.iterator();
while(iterator.hasNext()){
Object o=iterator.next();
if(o instanceof Element){//如果是子元素
Element child=(Element)o;
process(child);//递归调用
}else if(o instanceof Comment){//如果是说明
Comment c=(Comment)o;
System.out.println(c.getText());
// System.out.println();
}
}
}
public void inspect(Element element){ //element 为org.jdom.Element
//do some thing
//
}
相关文章推荐
- 【HTML】框架页、表单与JavaScript的综合应用
- Web综合应用实验(html css javascript)
- Html.AntiForgeryToken() 防止CSRF攻击 的AJaX应用
- Html.AntiForgeryToken() 防止CSRF攻击 的AJaX应用
- JFreeChart综合应用
- Ajax——jquery快速实现html、json、xml的ajax应用
- OAuth2.0学习(1-10)新浪开放平台微博认证-手机应用授权和refresh_token刷新access_token
- J2EE综合应用
- HTML 列表中的dl,dt,dd,ul,li,ol区别及应用
- 深入浅出VC++串口编程(六) 综合实例之短信应用开发
- [置顶] 第2讲:LR mobile http/html协议在手机应用性能测试中的实现(二)
- 【学习】如何制作手机端html模板(REM的实际应用)
- HTML 5 的 Canvas 中应用卷积矩阵对图像处理
- Html网页表格结构化标记的应用
- html email类型的应用
- ANDROID L——Material Design综合应用(Demo)
- eclipse+jboss xdoclet ejb-sevlet综合应用(一)
- HTML 5移动应用案例集锦(收集中)
- JAVA创建、读写XML文档(应用JDOM包)
- html+css+jQuery实现多种图片简单切换功能大综合