您的位置:首页 > 编程语言 > Java开发

基于java的全文检索接口lucene2.4测试一

2009-01-01 20:08 465 查看
Indexer.java

package lucene.main.test;

import java.io.File;

import java.io.FileReader;

import java.io.IOException;

import java.util.Date;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.index.IndexWriter.MaxFieldLength;

public class Indexer{

/*

* @param args

* @throws Exception

* @author rush

*/

public static void main(String[] args) throws Exception{

File indexDir=new File("D://lucene//temp//Text_Index_Path");//存放索引目录

File dataDir=new File("D://lucene//temp//TextPath//");//被索引目录

long start=new Date().getTime();

int numIndexed=index(indexDir,dataDir);

long end=new Date().getTime();

System.out.println("Indexing "+numIndexed+" file took "+(end-start)+" milliseconds");

System.out.println(numIndexed);

}

private static int index(File indexDir, File dataDir) throws IOException {

// TODO Auto-generated method stub

if(!dataDir.exists()||!dataDir.isDirectory())

{

throw new IOException(dataDir+"does not exit or is not a directory");

}

IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true,MaxFieldLength.LIMITED);

writer.setUseCompoundFile(false);//默认创建复合式索引,false:多文件索引 true 复合式索引

indexDirectory(writer,dataDir);

int numIndexed=writer.maxDoc();//返回在指定索引中

writer.optimize();//优化

writer.close();

return numIndexed;

}

private static void indexDirectory(IndexWriter writer, File dir) throws IOException {

// TODO Auto-generated method stub

File[] files=dir.listFiles();//返回抽象路径名数组,这些路径名表示此抽象路径名表示的目录中的文件和目录。

for(int i=0;i<files.length;i++)

{

File file=files[i];

if(file.isDirectory())

{

indexDirectory(writer,file);

}else if(file.getName().endsWith(".txt")){

indexFile(writer,file);

}

}

}

private static void indexFile(IndexWriter writer, File file) throws IOException {

// TODO Auto-generated method stub

if(file.isHidden()||!file.canRead()||!file.exists())

{

return;

}

//getCanonicalPath()返回文件路径名的规范形式

System.out.println("Indexing "+file.getCanonicalPath());

Document doc=new Document();

Field contents=new Field("contents",new FileReader(file));

Field filename=new Field("filename",file.getCanonicalPath(),Field.Store.YES,Field.Index.ANALYZED,Field.TermVector.NO);

doc.add(contents);

doc.add(filename);

writer.addDocument(doc);

}

}

Searcher.java

package lucene.main.test;

import java.io.File;

import java.io.IOException;

import java.util.Date;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.queryParser.ParseException;

import org.apache.lucene.queryParser.QueryParser;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.Query;

import org.apache.lucene.search.ScoreDoc;

import org.apache.lucene.search.TopDocCollector;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.FSDirectory;

/*

* @param args

* @throws Exception

* @author rush

*/

public class Searcher {

public static void main(String[] args) throws Exception{

File indexDir=new File("D://lucene//temp//Text_Index_Path");

String keywords="love";

if(!indexDir.exists()||!indexDir.isDirectory())

{

throw new Exception(indexDir+"does not exits or is not a directory.");

}

search(indexDir,keywords);

}

private static void search(File indexDir, String keywords) throws IOException, ParseException {

// TODO Auto-generated method stub

Directory fsDir=FSDirectory.getDirectory(indexDir);//获取索引目录

IndexSearcher searcher=new IndexSearcher(fsDir);//创建索引查找器

//创建解析器(查询字符串的解析),指定域名和分析器

QueryParser qp=new QueryParser("contents",new StandardAnalyzer());

//指定查找关键字

Query query=qp.parse(keywords);

//指定返回的结果集数

int hitsPerPage=100;

TopDocCollector collector=new TopDocCollector(hitsPerPage);

long start=new Date().getTime();

//开始查找,传入查找的关键字参数和返回结果数参数

searcher.search(query,collector);

ScoreDoc[] hits=collector.topDocs().scoreDocs;

long end=new Date().getTime();

System.out.println("Found "+hits.length+" documents in "+(end-start)+" milliseconds that matched query '"+keywords+"':");

for(int i=0;i<hits.length;i++){

int docId=hits[i].doc;

Document doc=searcher.doc(docId);

System.out.println(docId+":"+doc.get("filename"));

}

}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: