基于java的全文检索接口lucene2.4测试一
2009-01-01 20:08
465 查看
Indexer.java
package lucene.main.test;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Date;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
public class Indexer{
/*
* @param args
* @throws Exception
* @author rush
*/
public static void main(String[] args) throws Exception{
File indexDir=new File("D://lucene//temp//Text_Index_Path");//存放索引目录
File dataDir=new File("D://lucene//temp//TextPath//");//被索引目录
long start=new Date().getTime();
int numIndexed=index(indexDir,dataDir);
long end=new Date().getTime();
System.out.println("Indexing "+numIndexed+" file took "+(end-start)+" milliseconds");
System.out.println(numIndexed);
}
private static int index(File indexDir, File dataDir) throws IOException {
// TODO Auto-generated method stub
if(!dataDir.exists()||!dataDir.isDirectory())
{
throw new IOException(dataDir+"does not exit or is not a directory");
}
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true,MaxFieldLength.LIMITED);
writer.setUseCompoundFile(false);//默认创建复合式索引,false:多文件索引 true 复合式索引
indexDirectory(writer,dataDir);
int numIndexed=writer.maxDoc();//返回在指定索引中
writer.optimize();//优化
writer.close();
return numIndexed;
}
private static void indexDirectory(IndexWriter writer, File dir) throws IOException {
// TODO Auto-generated method stub
File[] files=dir.listFiles();//返回抽象路径名数组,这些路径名表示此抽象路径名表示的目录中的文件和目录。
for(int i=0;i<files.length;i++)
{
File file=files[i];
if(file.isDirectory())
{
indexDirectory(writer,file);
}else if(file.getName().endsWith(".txt")){
indexFile(writer,file);
}
}
}
private static void indexFile(IndexWriter writer, File file) throws IOException {
// TODO Auto-generated method stub
if(file.isHidden()||!file.canRead()||!file.exists())
{
return;
}
//getCanonicalPath()返回文件路径名的规范形式
System.out.println("Indexing "+file.getCanonicalPath());
Document doc=new Document();
Field contents=new Field("contents",new FileReader(file));
Field filename=new Field("filename",file.getCanonicalPath(),Field.Store.YES,Field.Index.ANALYZED,Field.TermVector.NO);
doc.add(contents);
doc.add(filename);
writer.addDocument(doc);
}
}
Searcher.java
package lucene.main.test;
import java.io.File;
import java.io.IOException;
import java.util.Date;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
/*
* @param args
* @throws Exception
* @author rush
*/
public class Searcher {
public static void main(String[] args) throws Exception{
File indexDir=new File("D://lucene//temp//Text_Index_Path");
String keywords="love";
if(!indexDir.exists()||!indexDir.isDirectory())
{
throw new Exception(indexDir+"does not exits or is not a directory.");
}
search(indexDir,keywords);
}
private static void search(File indexDir, String keywords) throws IOException, ParseException {
// TODO Auto-generated method stub
Directory fsDir=FSDirectory.getDirectory(indexDir);//获取索引目录
IndexSearcher searcher=new IndexSearcher(fsDir);//创建索引查找器
//创建解析器(查询字符串的解析),指定域名和分析器
QueryParser qp=new QueryParser("contents",new StandardAnalyzer());
//指定查找关键字
Query query=qp.parse(keywords);
//指定返回的结果集数
int hitsPerPage=100;
TopDocCollector collector=new TopDocCollector(hitsPerPage);
long start=new Date().getTime();
//开始查找,传入查找的关键字参数和返回结果数参数
searcher.search(query,collector);
ScoreDoc[] hits=collector.topDocs().scoreDocs;
long end=new Date().getTime();
System.out.println("Found "+hits.length+" documents in "+(end-start)+" milliseconds that matched query '"+keywords+"':");
for(int i=0;i<hits.length;i++){
int docId=hits[i].doc;
Document doc=searcher.doc(docId);
System.out.println(docId+":"+doc.get("filename"));
}
}
}
package lucene.main.test;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Date;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
public class Indexer{
/*
* @param args
* @throws Exception
* @author rush
*/
public static void main(String[] args) throws Exception{
File indexDir=new File("D://lucene//temp//Text_Index_Path");//存放索引目录
File dataDir=new File("D://lucene//temp//TextPath//");//被索引目录
long start=new Date().getTime();
int numIndexed=index(indexDir,dataDir);
long end=new Date().getTime();
System.out.println("Indexing "+numIndexed+" file took "+(end-start)+" milliseconds");
System.out.println(numIndexed);
}
private static int index(File indexDir, File dataDir) throws IOException {
// TODO Auto-generated method stub
if(!dataDir.exists()||!dataDir.isDirectory())
{
throw new IOException(dataDir+"does not exit or is not a directory");
}
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true,MaxFieldLength.LIMITED);
writer.setUseCompoundFile(false);//默认创建复合式索引,false:多文件索引 true 复合式索引
indexDirectory(writer,dataDir);
int numIndexed=writer.maxDoc();//返回在指定索引中
writer.optimize();//优化
writer.close();
return numIndexed;
}
private static void indexDirectory(IndexWriter writer, File dir) throws IOException {
// TODO Auto-generated method stub
File[] files=dir.listFiles();//返回抽象路径名数组,这些路径名表示此抽象路径名表示的目录中的文件和目录。
for(int i=0;i<files.length;i++)
{
File file=files[i];
if(file.isDirectory())
{
indexDirectory(writer,file);
}else if(file.getName().endsWith(".txt")){
indexFile(writer,file);
}
}
}
private static void indexFile(IndexWriter writer, File file) throws IOException {
// TODO Auto-generated method stub
if(file.isHidden()||!file.canRead()||!file.exists())
{
return;
}
//getCanonicalPath()返回文件路径名的规范形式
System.out.println("Indexing "+file.getCanonicalPath());
Document doc=new Document();
Field contents=new Field("contents",new FileReader(file));
Field filename=new Field("filename",file.getCanonicalPath(),Field.Store.YES,Field.Index.ANALYZED,Field.TermVector.NO);
doc.add(contents);
doc.add(filename);
writer.addDocument(doc);
}
}
Searcher.java
package lucene.main.test;
import java.io.File;
import java.io.IOException;
import java.util.Date;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
/*
* @param args
* @throws Exception
* @author rush
*/
public class Searcher {
public static void main(String[] args) throws Exception{
File indexDir=new File("D://lucene//temp//Text_Index_Path");
String keywords="love";
if(!indexDir.exists()||!indexDir.isDirectory())
{
throw new Exception(indexDir+"does not exits or is not a directory.");
}
search(indexDir,keywords);
}
private static void search(File indexDir, String keywords) throws IOException, ParseException {
// TODO Auto-generated method stub
Directory fsDir=FSDirectory.getDirectory(indexDir);//获取索引目录
IndexSearcher searcher=new IndexSearcher(fsDir);//创建索引查找器
//创建解析器(查询字符串的解析),指定域名和分析器
QueryParser qp=new QueryParser("contents",new StandardAnalyzer());
//指定查找关键字
Query query=qp.parse(keywords);
//指定返回的结果集数
int hitsPerPage=100;
TopDocCollector collector=new TopDocCollector(hitsPerPage);
long start=new Date().getTime();
//开始查找,传入查找的关键字参数和返回结果数参数
searcher.search(query,collector);
ScoreDoc[] hits=collector.topDocs().scoreDocs;
long end=new Date().getTime();
System.out.println("Found "+hits.length+" documents in "+(end-start)+" milliseconds that matched query '"+keywords+"':");
for(int i=0;i<hits.length;i++){
int docId=hits[i].doc;
Document doc=searcher.doc(docId);
System.out.println(docId+":"+doc.get("filename"));
}
}
}
相关文章推荐
- Lucene:基于Java的全文检索引擎简介--车东老师
- 【转载】Lucene:基于Java的全文检索引擎简介
- Lucene:基于Java的全文检索引擎简介
- Lucene:基于Java的全文检索引擎简介
- Lucene:基于Java的全文检索引擎简介
- Lucene:基于Java的全文检索引擎简介
- Lucene:基于Java的全文检索引擎简介
- Lucene:基于Java的全文检索引擎简介
- Lucene:基于Java的全文检索引擎简介
- Lucene:基于Java的全文检索引擎简介
- Lucene:基于Java的全文检索引擎简介
- Lucene:基于Java的全文检索引擎简介
- 基于Java的全文检索引擎简介Lucene
- Lucene:基于Java的全文检索引擎简介
- Lucene:基于Java的全文检索引擎简介
- Lucene:基于Java的全文检索引擎简介
- Lucene:基于Java的全文检索引擎简介
- Lucene:基于Java的全文检索引擎简介
- Lucene:基于Java的全文检索引擎简介
- Lucene:基于Java的全文检索引擎简介