您的位置:首页 > 其它

全文检索引擎工具包Lucene初体验

2008-01-28 17:47 267 查看
Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。

下载地址:http://lucene.apache.org/ (本文使用最新的lucene2.3)

A.创建索引文件


package demo;




import java.io.File;




import org.apache.lucene.analysis.standard.StandardAnalyzer;


import org.apache.lucene.index.IndexWriter;






public class CreateDataBase ...{






public static void main(String[] args) ...{




CreateDataBase temp = new CreateDataBase();




if (temp.createDataBase("C:/temp") == 1) ...{


System.out.println("db init succ");


}


}






public CreateDataBase() ...{


}






public int createDataBase(File file) ...{


int returnValue = 0;




if (!file.isDirectory()) ...{


file.mkdirs();


}




try ...{


IndexWriter indexWriter = new IndexWriter(file,


new StandardAnalyzer(), true);


indexWriter.close();


returnValue = 1;


}






catch (Exception ex) ...{


ex.printStackTrace();


}


return returnValue;


}






public int createDataBase(String file) ...{


return this.createDataBase(new File(file));


}




}

B.添加记录:

1.先新建两个文本文件,文件名分别为ugg1.txt和ugg2.txt。内容随意,譬如:
ugg1.txt:
good
book
luky

ugg2.txt:
look
book
meet

并运行如下代码:


package demo;




import java.io.File;


import java.io.FileReader;




import org.apache.lucene.analysis.standard.StandardAnalyzer;


import org.apache.lucene.document.Document;


import org.apache.lucene.document.Field;


import org.apache.lucene.index.IndexWriter;






public class InsertRecords ...{






public static void main(String[] args) ...{




InsertRecords temp = new InsertRecords();


String dbpath = "C:/temp";


// holen1.txt中包含关键字"holen"和"java"




if (temp.insertRecords(dbpath, "C:/temp/ugg1.txt") == 1) ...{


System.out.println("add file1 succ");


}




// holen2.txt中包含关键字"holen"和"chen"




if (temp.insertRecords(dbpath, "C:/temp/ugg2.txt") == 1) ...{


System.out.println("add file2 succ");


}


}






public InsertRecords() ...{


}






public int insertRecords(String dbpath, File file) ...{


int returnValue = 0;




try ...{


IndexWriter indexWriter = new IndexWriter(dbpath,


new StandardAnalyzer(), false);


this.addFiles(indexWriter, file);


returnValue = 1;






} catch (Exception ex) ...{


ex.printStackTrace();


}


return returnValue;


}






public int insertRecords(String dbpath, String file) ...{


return this.insertRecords(dbpath, new File(file));


}






public void addFiles(IndexWriter indexWriter, File file) ...{


Document doc = new Document();




try ...{


doc.add(new Field("filename", file.getName(), Field.Store.YES,


Field.Index.UN_TOKENIZED));


// 以下两句只能取一句,前者是索引不存储,后者是索引且存储


doc.add(new Field("contents", new FileReader(file)));


indexWriter.addDocument(doc);


indexWriter.close();




} catch (Exception ex) ...{


ex.printStackTrace();


}


}


}

C.进行查询


package demo;




import java.util.ArrayList;




import org.apache.lucene.analysis.standard.StandardAnalyzer;


import org.apache.lucene.document.Document;


import org.apache.lucene.queryParser.QueryParser;


import org.apache.lucene.search.Hits;


import org.apache.lucene.search.IndexSearcher;


import org.apache.lucene.search.Query;


import org.apache.lucene.search.Searcher;






public class QueryRecords ...{






public QueryRecords() ...{


}




public ArrayList queryRecords(String searchkey, String dbpath,




String searchfield) ...{


ArrayList list = null;




try ...{


Searcher searcher = new IndexSearcher(dbpath);


QueryParser parser = new QueryParser(searchfield,


new StandardAnalyzer());


Query query = parser.parse(searchkey);


Hits hits = searcher.search(query);




if (hits != null) ...{


list = new ArrayList();


int temp_hitslength = hits.length();


Document doc = null;




for (int i = 0; i < temp_hitslength; i++) ...{


doc = hits.doc(i);


list.add(doc.get("filename"));


}


}




} catch (Exception ex) ...{


ex.printStackTrace();


}


return list;


}






public static void main(String[] args) ...{




QueryRecords temp = new QueryRecords();


ArrayList list = null;


list = temp.queryRecords("look", "c:/temp", "contents");




for (int i = 0; i < list.size(); i++) ...{


System.out.println((String) list.get(i));


}


}




}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: