您的位置:首页 > 其它

Lucene3.5 实例

2012-01-20 14:40 162 查看
方法主体

package cn.vincent;

import java.io.File;
import java.io.IOException;
import java.util.logging.SimpleFormatter;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.ChineseAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.NumberTools;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;
import org.wltea.analyzer.lucene.IKSimilarity;

public class LuceneSearch {

File filePath = new File("E:\\Workspaces\\MyEclipse for Spring\\luceneDemo\\luceneIndex");

File file = new File(
"E:\\Workspaces\\MyEclipse for Spring\\luceneDemo\\luceneDatasource\\IndexWriter addDocument's a javadoc .txt");
File file2 = new File("E:\\Workspaces\\MyEclipse for Spring\\luceneDemo\\luceneDatasource\\小笑话_总统的房间 Room .txt");
private Analyzer analyzer = new IKAnalyzer();

@Test
public void createIndex() throws Exception {
buildIndex(file2);

}

private void buildIndex(File myfile) throws Exception {
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, analyzer);
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
IndexWriter indexWriter = null;
;
try {
indexWriter = new IndexWriter(FSDirectory.open(filePath), iwc);
Document doc = FileToDocument.fileToDocument(myfile);
indexWriter.addDocument(doc);
} finally {
indexWriter.close(); // 必须关闭,否则Lucene无法保存索引
}
}

@Test
public void luceneQuery() {
String queryStr = "room";
String f = "content";
String[] fields = { "name", "content" };
try {
// 构造查询器

QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_35, fields, analyzer);
//			QueryParser queryParser = new QueryParser(Version.LUCENE_35, f, analyzer);
Query query = queryParser.parse(queryStr);
search(query);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {

}

}

private void search(Query query) throws Exception {
IndexReader r = IndexReader.open(FSDirectory.open(filePath));
IndexSearcher indexSearcher = new IndexSearcher(r);
indexSearcher.setSimilarity(new IKSimilarity());

TopDocs docs = indexSearcher.search(query, 10);
int totalHits = docs.totalHits;
System.out.println("total : " + totalHits);

//高亮
Formatter formatter = new SimpleHTMLFormatter("<font color='red'>","</font>");
Scorer scorer = new QueryScorer(query);
Highlighter highlighter = new Highlighter(formatter, scorer);

Fragmenter fragmenter = new SimpleFragmenter(50);
highlighter.setTextFragmenter(fragmenter);

//取出当前页数据

for (ScoreDoc doc : docs.scoreDocs) {
Document document = indexSearcher.doc(doc.doc);
MyFile myFile = new MyFile();
myFile.setName(document.get("name"));
myFile.setSize(NumberTools.stringToLong(document.get("size")));
myFile.setPath(document.get("path"));

//返回高亮后的结果,如果当前属性值没有出现关键字则出现null
String hc = highlighter.getBestFragment(analyzer, "content", document.get("content"));
if (hc == null) {
String content = document.get("content");
int endIndex = Math.min(50, content.length());
hc = content.substring(0, endIndex);// 最多前50个字符
}
myFile.setContent(hc);

System.out.println(myFile);
}
// 使用完毕需要关闭!
r.close();
indexSearcher.close();
}
}


实体类

package cn.vincent;

public class MyFile {
private String name;
private String content;
private Long size;
private String path;
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}

public Long getSize() {
return size;
}
public void setSize(Long size) {
this.size = size;
}
public String getPath() {
return path;
}
public void setPath(String path) {
this.path = path;
}

public  String toString(){
System.out.println("name:" + this.name);
System.out.println("content:" + this.content);
System.out.println("size:" + this.size);
System.out.println("path:" + this.path);
return "";
}
}


操作类:

package cn.vincent;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.NumberTools;

@SuppressWarnings("deprecation")
public class FileToDocument {
public static  Document fileToDocument(File path){
MyFile file = new MyFile();
file.setName(path.getName());
file.setContent(readFileContent(path));
file.setSize(path.length());
file.setPath(path.getPath());

Document doc = new Document();
doc.add(new Field("name",file.getName(),Store.YES,Index.ANALYZED));
doc.add(new Field("content",file.getContent(),Store.YES,Index.ANALYZED));
doc.add(new Field("size",NumberTools.longToString(file.getSize()),Store.YES,Index.NOT_ANALYZED));
doc.add(new Field("path",file.getPath(),Store.YES,Index.NOT_ANALYZED));
return doc;
}

private static String readFileContent(File path){
StringBuffer content = new StringBuffer();
try {
BufferedReader bfReader = new BufferedReader(new InputStreamReader(new FileInputStream(path)));
for(String line = null; (line = bfReader.readLine()) != null;){
content.append(line);
}

} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return content.toString();
}
}


写的时候,, 封装Long--〉String类型的类NumberTools 标识以及过时? 调用NumberTools.stringToLong方法 用32进制缩小Long的长度,

找半天似乎也没有找到替代的?有谁知道不~
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: