Lucene小练八(实现了索引和搜索)
2012-09-12 15:12
351 查看
//主类 package Java.se.lucene; import java.io.File; import java.io.IOException; import java.text.SimpleDateFormat; import java.util.Date; import java.util.HashMap; import java.util.Map; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.StaleReaderException; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; public class index { private String[] ids={"1","2","3","4","5","6"}; private String[] emails={"aa@aa.com","bb@bb.com", "cc@cc.com","dd@dd.com","ee@ee.com","ff@ff.com"}; private String[] contents={"i like gdsfgfds","i like fsdfs","i like fdsfsd", "i like fdsfsd","i like like fdfs","i like like like fsefsdfg"}; private int[] attachs={1,2,3,4,5,6}; private String[] names={"liwu","zhangsan","xiaoqinag","laona", "dabao","lisi"}; private Directory directory=null; private IndexWriter writer=null; private Date[] dates=null; private Map<String,Float> scores=new HashMap<String,Float>(); private static IndexReader reader=null; public index() { setDate();//创建日期 try { scores.put("aa.com", 2.0f); scores.put("bb.com", 1.0f); scores.put("cc.com", 3.0f); scores.put("dd.com", 4.0f); scores.put("ee.com", 5.0f); scores.put("ff.com", 6.0f); // directory=new RAMDirectory();//从内存打开Directory Index(); //从硬盘打开Directory directory=FSDirectory.open(new File("f:/lucene/Index04")); // reader=IndexReader.open(directory); } catch (Exception e) { e.printStackTrace(); } } //建立索引 public void Index() { Document document=null; try { writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36))); writer.deleteAll();//更新索引 for(int i=0;i<ids.length;i++) { document=new Document(); document.add(new Field("id", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS )); document.add(new Field("email",emails[i], Field.Store.YES,Field.Index.NOT_ANALYZED)); document.add(new Field("content", contents[i], Field.Store.YES, Field.Index.ANALYZED)); document.add(new Field("name",names[i], Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS)); //为数字添加索引 document.add(new NumericField("attach", Field.Store.YES,true). setIntValue(attachs[i])); //为日期添加索引 document.add(new NumericField("date", Field.Store.YES,true) .setLongValue(dates[i].getTime()));//记住要getTime String str=emails[i].substring(emails[i].lastIndexOf("@")+1); System.out.println(str); if(scores.containsKey(str)) { document.setBoost(scores.get(str)); }else{ document.setBoost(0.5f); } writer.addDocument(document); writer.commit();//提交writer } } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (LockObtainFailedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally{ try { writer.close(); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } //遍历各种视频 public void query() { try { IndexReader reader=IndexReader.open(directory); System.out.println("numdocs:"+reader.numDocs());//文档总数 System.out.println("maxDocs:"+reader.maxDoc());//可存储文章做大数目 System.out.println("detelemaxDocs:"+reader.numDeletedDocs()); reader.close(); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } //用writer删除索引,但并没有完全删除,可以恢复的 public void delete01() { try { writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36))); writer.deleteDocuments(new Term("id","1")); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (LockObtainFailedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); }finally{ try { writer.close(); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } //用reader来删除 //使用reader进行恢复 @SuppressWarnings("deprecation") public void undelete() { IndexReader reader = null; try { reader = IndexReader.open(directory,false); reader.undeleteAll(); reader.close(); } catch (StaleReaderException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (LockObtainFailedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } //清空回收站 public void forceDelete() { try { writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36))); writer.forceMergeDeletes(); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (LockObtainFailedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); }finally{ try { writer.close(); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } //已经停用 public void forceMerge() { try { writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36))); writer.forceMerge(3); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (LockObtainFailedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); }finally{ try { writer.close(); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } //更新索引 public void update() { Document document=null; try { writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36))); /* * lucene没有提供更新,只能先删除再添加 * */ for(int i=0;i<ids.length;i++) { document=new Document(); document.add(new Field("id", "11", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS )); document.add(new Field("email",emails[0], Field.Store.YES,Field.Index.ANALYZED)); document.add(new Field("content", contents[0], Field.Store.NO, Field.Index.NOT_ANALYZED)); document.add(new Field("name",names[0], Field.Store.YES,Field.Index.NOT_ANALYZED)); writer.updateDocument(new Term("id","1"), document); } } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (LockObtainFailedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); }finally{ try { writer.close(); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } //搜索 public void search01() { IndexReader reader=null; try { reader = IndexReader.open(directory); IndexSearcher searcher=new IndexSearcher(reader); TermQuery query=new TermQuery(new Term("content","like")); TopDocs tds =searcher.search(query, 10); for(ScoreDoc sdc:tds.scoreDocs) { Document document=searcher.doc(sdc.doc); System.out.println("("+sdc.doc+")"+document.get("name")+"["+document.get("email")+ "]-->"+document.get("id")+"..."+document.get("attach")+"..."+document.get("date")); } reader.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public void search02() { //IndexReader reader=null; try { //reader = IndexReader.open(directory); IndexSearcher searcher=getSearcher(); TermQuery query=new TermQuery(new Term("content","like")); TopDocs tds =searcher.search(query, 10); for(ScoreDoc sdc:tds.scoreDocs) { Document document=searcher.doc(sdc.doc); System.out.println("("+sdc.doc+")"+document.get("name")+"["+document.get("email")+ "]-->"+document.get("id")+"..."+document.get("attach")+"..."+document.get ("date")); } searcher.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } //创建日期 public void setDate() { SimpleDateFormat sdf=new SimpleDateFormat("yyyy-mm-kk"); try { dates=new Date[ids.length]; dates[0]=sdf.parse("2010-08-17"); dates[1]=sdf.parse("2011-02-17"); dates[2]=sdf.parse("2012-03-17"); dates[3]=sdf.parse("2011-04-17"); dates[4]=sdf.parse("2012-05-17"); dates[5]=sdf.parse("2011-07-17"); } catch (Exception e) { e.printStackTrace(); // TODO: handle exception } } //创建Searcher public IndexSearcher getSearcher() { try { reader=IndexReader.open(directory); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return(new IndexSearcher(reader)); } } //测试类 package Java.se.lucene; import org.junit.Test; public class Test_Index { @Test public void test_index() //测试索引 { index ind=new index(); ind.Index(); } @Test public void test_query() //遍历 { index ind=new index(); ind.query(); } @Test public void test_delete01() //删除索引 { index ind=new index(); ind.delete01(); } @Test public void test_undelete() //恢复删除 { index ind=new index(); ind.undelete(); } @Test public void test_forceDelete() //清空回收站站 { index ind=new index(); ind.forceDelete(); } @Test public void test_forceMerge() //清空回收站站 { index ind=new index(); ind.forceMerge(); } @Test public void test_update() //更新索引 { index ind=new index(); ind.update(); } @Test public void test_search01() //更新索引 { index ind=new index(); ind.search01(); } @Test public void test_search02() //更新索引 { index ind=new index(); for(int i=0;i<5;i++) { ind.search02(); System.out.println("------------------------"); try { Thread.sleep(5000); } catch (InterruptedException e) { e.printStackTrace(); } } } /* public void check() throws IOException{ //检查索引是否被正确建立(打印索引) Directory directory = FSDirectory.open(new File("f:/lucene/Index04/"));//创建directory,其储存方式为在 IndexReader reader = IndexReader.open(directory); for(int i = 0;i<reader.numDocs();i++){ System.out.println(reader.document(i)); } reader.close(); } public static void main(String[] args) throws IOException { new index().check(); }*/ }
相关文章推荐
- 使用lucene3.6创建索引和实现简单搜索
- lucene5--增量索引(Zoie)(近实时搜索的实现)
- 使用lucene3.6创建索引和实现简单搜索
- c#使用Lucene.net创建索引,实现搜索的代码示例
- lucene全文搜索之四:创建索引搜索器、6种文档搜索器实现以及搜索结果分析(结合IKAnalyzer分词器的搜索器)基于lucene5.5.3
- lucene全文搜索之四:创建索引搜索器、6种文档搜索器实现以及搜索结果分析(结合IKAnalyzer分词器的搜索器)基于lucene5.5.3
- 使用Lucene.Net管理索引实现搜索
- Lucene.net 实现近实时搜索(NRT)和增量索引
- 【Lucene4.8教程之一】使用Lucene4.8进行索引及搜索的基本操作
- Lucene 生成索引以及搜索的完整例子
- lucene.net 如何实现全文的搜索
- lucene.net搜索--------索引详解
- 给你的网站加上站内搜索---Spring+Hibernate基于Compass(基于Lucene)实现
- Lucene如何实现增量索引?
- Lucene中最简单的索引和搜索示例
- 【Lucene3.6.2入门系列】第14节_SolrJ操作索引和搜索文档以及整合中文分词
- 21、学习Lucene3.5索引之近实时搜索常用工具类
- Lucene 实现txt文件的构建索引与查询
- 【Lucene4.8教程之一】使用Lucene4.8进行索引及搜索的基本操作
- Lucene 入门之创建索引和搜索