您的位置:首页 > 产品设计 > UI/UE

Lucene小练八(实现了索引和搜索)

2012-09-12 15:12 351 查看
//主类

package Java.se.lucene;

import java.io.File;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.StaleReaderException;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;

public class index {
private String[] ids={"1","2","3","4","5","6"};
private String[] emails={"aa@aa.com","bb@bb.com",
"cc@cc.com","dd@dd.com","ee@ee.com","ff@ff.com"};
private String[] contents={"i like  gdsfgfds","i like fsdfs","i like fdsfsd",
"i like fdsfsd","i like like fdfs","i like like like fsefsdfg"};
private int[] attachs={1,2,3,4,5,6};
private String[] names={"liwu","zhangsan","xiaoqinag","laona",
"dabao","lisi"};
private Directory directory=null;
private IndexWriter writer=null;
private Date[] dates=null;
private Map<String,Float> scores=new HashMap<String,Float>();
private static IndexReader reader=null;

public index()
{
setDate();//创建日期
try {
scores.put("aa.com", 2.0f);
scores.put("bb.com", 1.0f);
scores.put("cc.com", 3.0f);
scores.put("dd.com", 4.0f);
scores.put("ee.com", 5.0f);
scores.put("ff.com", 6.0f);
// directory=new RAMDirectory();//从内存打开Directory
Index();
//从硬盘打开Directory
directory=FSDirectory.open(new File("f:/lucene/Index04"));
//	reader=IndexReader.open(directory);
} catch (Exception e) {
e.printStackTrace();
}
}
//建立索引
public void Index()
{
Document document=null;
try {
writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36,
new StandardAnalyzer(Version.LUCENE_36)));
writer.deleteAll();//更新索引
for(int i=0;i<ids.length;i++)
{
document=new Document();
document.add(new Field("id", ids[i],
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS  ));
document.add(new Field("email",emails[i],
Field.Store.YES,Field.Index.NOT_ANALYZED));
document.add(new Field("content", contents[i],
Field.Store.YES, Field.Index.ANALYZED));
document.add(new Field("name",names[i],
Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
//为数字添加索引
document.add(new NumericField("attach", Field.Store.YES,true).
setIntValue(attachs[i]));
//为日期添加索引
document.add(new NumericField("date", Field.Store.YES,true)
.setLongValue(dates[i].getTime()));//记住要getTime

String str=emails[i].substring(emails[i].lastIndexOf("@")+1);
System.out.println(str);
if(scores.containsKey(str))
{
document.setBoost(scores.get(str));
}else{
document.setBoost(0.5f);
}
writer.addDocument(document);
writer.commit();//提交writer
}
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (LockObtainFailedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
finally{
try {
writer.close();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
//遍历各种视频
public void query()
{
try {
IndexReader reader=IndexReader.open(directory);
System.out.println("numdocs:"+reader.numDocs());//文档总数
System.out.println("maxDocs:"+reader.maxDoc());//可存储文章做大数目
System.out.println("detelemaxDocs:"+reader.numDeletedDocs());
reader.close();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
//用writer删除索引,但并没有完全删除,可以恢复的
public void delete01()
{
try {
writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36,
new StandardAnalyzer(Version.LUCENE_36)));
writer.deleteDocuments(new Term("id","1"));
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (LockObtainFailedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
try {
writer.close();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
//用reader来删除

//使用reader进行恢复
@SuppressWarnings("deprecation")
public void undelete()
{
IndexReader reader = null;
try {
reader = IndexReader.open(directory,false);
reader.undeleteAll();
reader.close();
} catch (StaleReaderException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (LockObtainFailedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
//清空回收站
public void forceDelete()
{
try {
writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36,
new StandardAnalyzer(Version.LUCENE_36)));
writer.forceMergeDeletes();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (LockObtainFailedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
try {
writer.close();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
//已经停用
public void forceMerge()
{
try {
writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36,
new StandardAnalyzer(Version.LUCENE_36)));
writer.forceMerge(3);
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (LockObtainFailedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
try {
writer.close();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
//更新索引
public void update()
{
Document document=null;
try {
writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36,
new StandardAnalyzer(Version.LUCENE_36)));
/*
* lucene没有提供更新,只能先删除再添加
*
*/
for(int i=0;i<ids.length;i++)
{
document=new Document();
document.add(new Field("id", "11",
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS  ));
document.add(new Field("email",emails[0],
Field.Store.YES,Field.Index.ANALYZED));
document.add(new Field("content", contents[0],
Field.Store.NO, Field.Index.NOT_ANALYZED));
document.add(new Field("name",names[0],
Field.Store.YES,Field.Index.NOT_ANALYZED));
writer.updateDocument(new Term("id","1"), document);
}
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (LockObtainFailedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
try {
writer.close();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
//搜索
public void search01()
{
IndexReader reader=null;
try {
reader = IndexReader.open(directory);
IndexSearcher searcher=new IndexSearcher(reader);
TermQuery query=new TermQuery(new Term("content","like"));
TopDocs tds =searcher.search(query, 10);
for(ScoreDoc sdc:tds.scoreDocs)
{
Document document=searcher.doc(sdc.doc);
System.out.println("("+sdc.doc+")"+document.get("name")+"["+document.get("email")+
"]-->"+document.get("id")+"..."+document.get("attach")+"..."+document.get("date"));
}
reader.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}

public void search02()
{
//IndexReader reader=null;
try {
//reader = IndexReader.open(directory);
IndexSearcher searcher=getSearcher();
TermQuery query=new TermQuery(new Term("content","like"));
TopDocs tds =searcher.search(query, 10);
for(ScoreDoc sdc:tds.scoreDocs)
{
Document document=searcher.doc(sdc.doc);
System.out.println("("+sdc.doc+")"+document.get("name")+"["+document.get("email")+
"]-->"+document.get("id")+"..."+document.get("attach")+"..."+document.get
("date"));
}
searcher.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
//创建日期
public void setDate()
{
SimpleDateFormat sdf=new SimpleDateFormat("yyyy-mm-kk");
try {
dates=new Date[ids.length];
dates[0]=sdf.parse("2010-08-17");
dates[1]=sdf.parse("2011-02-17");
dates[2]=sdf.parse("2012-03-17");
dates[3]=sdf.parse("2011-04-17");
dates[4]=sdf.parse("2012-05-17");
dates[5]=sdf.parse("2011-07-17");
} catch (Exception e) {
e.printStackTrace();
// TODO: handle exception
}
}
//创建Searcher
public IndexSearcher getSearcher()
{
try {
reader=IndexReader.open(directory);
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

return(new IndexSearcher(reader));
}

}
//测试类

package Java.se.lucene;

import org.junit.Test;

public class Test_Index {
@Test
public void test_index() //测试索引
{
index ind=new index();
ind.Index();
}
@Test
public void test_query() //遍历
{
index ind=new index();
ind.query();
}
@Test
public void test_delete01() //删除索引
{
index ind=new index();
ind.delete01();
}

@Test
public void test_undelete() //恢复删除
{
index ind=new index();
ind.undelete();
}
@Test
public void test_forceDelete() //清空回收站站
{
index ind=new index();
ind.forceDelete();
}
@Test
public void test_forceMerge() //清空回收站站
{
index ind=new index();
ind.forceMerge();
}
@Test
public void test_update() //更新索引
{
index ind=new index();
ind.update();
}
@Test
public void test_search01() //更新索引
{
index ind=new index();
ind.search01();
}
@Test
public void test_search02() //更新索引
{
index ind=new index();
for(int i=0;i<5;i++)
{
ind.search02();
System.out.println("------------------------");
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}

/*	public void check() throws IOException{						//检查索引是否被正确建立(打印索引)
Directory directory = FSDirectory.open(new File("f:/lucene/Index04/"));//创建directory,其储存方式为在
IndexReader reader = IndexReader.open(directory);
for(int i = 0;i<reader.numDocs();i++){
System.out.println(reader.document(i));
}
reader.close();
}
public static void main(String[] args) throws IOException {
new index().check();
}*/
}

内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息