您的位置:首页 > Web前端 > HTML

【垂直搜索引擎搭建15】HtmlParser中Filter方法(本地URL地址)

2016-08-24 21:39 441 查看
package org.algorithm;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;

import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;

public class LocalParserDemo {

public static void getContent(String url) throws ParserException,IOException{//解析网页
Parser parser = new Parser(url);
NodeFilter filter = new HasAttributeFilter("div");
NodeList nodes = parser.extractAllNodesThatMatch(filter);

String nodex = "";
if(nodes!=null){
for(int i=0;i<nodes.size();i++){
Node node = nodes.elementAt(0);
nodex = node.toString();

System.out.println(nodex);
}
}
}

@SuppressWarnings("resource")
public static void main(String[] args) throws IOException, ParserException {//加载本地网页
String path = "c://n382738784.html";
StringBuffer sb = new StringBuffer();
BufferedReader br = new BufferedReader(new FileReader(new File(path)));
String temp = "";
while((temp = br.readLine())!= null){
sb.append(temp);
sb.append("\r\n");
}
String url = sb.toString();

getContent(url);

}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  搜索引擎 filter url