您的位置:首页 > 其它

lucene4.3简单创建和查询索引实例

2013-11-29 16:25 513 查看
1.创建索引实例代码

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import javax.swing.filechooser.FileFilter;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class Indexer {

private IndexWriter writer;

private Analyzer analyzer;

List<Document> documents = new ArrayList<Document>();

public static void main(String[] args) {

String dataDir = "E:/lucene/data";
String indexDir = "E:/lucene/index";
try {
Indexer indexer = new Indexer(indexDir);
indexer.index(dataDir, new TextFilesFilter());
indexer.writer.commit();
System.out.println(indexer.writer.numDocs());
indexer.writer.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}

public Indexer(String indexDir) throws IOException{
Directory dir = FSDirectory.open(new File(indexDir));
analyzer = new IKAnalyzer();
LogMergePolicy mergePolicy = new LogDocMergePolicy();
// 索引基本配置
// 设置segment添加文档(Document)时的合并频率
// 值较小,建立索引的速度就较慢
// 值较大,建立索引的速度就较快,>10适合批量建立索引
mergePolicy.setMergeFactor(30);
// 设置segment最大合并文档(Document)数
// 值较小有利于追加索引的速度
// 值较大,适合批量建立索引和更快的搜索
mergePolicy.setMaxMergeDocs(5000);
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer);
//IndexWriterConfig indexWriterConfig=new IndexWriterConfig(Version.LUCENE_43,new StandardAnalyzer(Version.LUCENE_43));
indexWriterConfig.setMaxBufferedDocs(10000);
indexWriterConfig.setMergePolicy(mergePolicy);
indexWriterConfig.setRAMBufferSizeMB(64);
// /设置索引的打开模式 创建或者添加索引
indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
writer = new IndexWriter(dir, indexWriterConfig);
}

//将File信息写入document
private Document getDocument(File f) throws IOException{
Document document = new Document();
document.add(new StringField("name", f.getName(), Store.YES));
document.add(new TextField("content", "我爱你中国", Store.YES));
document.add(new StringField("fullpath", f.getCanonicalPath(),Store.YES));
document.add(new StringField("updateTime", String.valueOf(f.lastModified()),Store.YES));
return document;
}

private List<Document> getDocuments(File [] files, FileFilter filesFilter) throws IOException{

for(File f : files){
if(f.isDirectory()){
getDocuments(f.listFiles(),filesFilter);
}else{
if(!f.isHidden() && f.canRead() && (filesFilter != null && filesFilter.accept(f))){
documents.add(getDocument(f));
}
}
}
return documents;
}

//写入索引
private void indexFile(File [] files, FileFilter filesFilter) throws IOException{
List<Document> documents = getDocuments(files, filesFilter);
writer.addDocuments(documents);
}

private void index(String dataDri, TextFilesFilter filesFilter){
File [] files = new File(dataDri).listFiles();
try {
indexFile(files, new TextFilesFilter());
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}

/**
* 过滤器,只索引txt格式文件
* @author ANWJ
*
*/
private static class TextFilesFilter extends FileFilter{

@Override
public boolean accept(File f) {
// TODO Auto-generated method stub
return f.getName().toLowerCase().endsWith(".txt");
}

@Override
public String getDescription() {
// TODO Auto-generated method stub
return null;
}

}

}


2.检索索引实例代码

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.SearcherFactory;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class Searcher {

public static void search(String indexDir, String key) throws IOException, ParseException{

Directory directory = FSDirectory.open(new File(indexDir));

SearcherManager sm = new SearcherManager(directory,new  SearcherFactory());

IndexSearcher searcher = sm.acquire();
Analyzer  analyzer = new IKAnalyzer();
QueryParser parser = new QueryParser(Version.LUCENE_43, "content", analyzer);

Query query = parser.parse(key);

TopDocs hits = searcher.search(query, 10);
for(ScoreDoc doc : hits.scoreDocs){
Document document = searcher.doc(doc.doc);
System.out.println(document.get("content"));
}

}

public static void main(String[] args) {
String indexDir = "E:/lucene/index";
String key = "中国";
try {
search(indexDir, key);
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
}
}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: