您的位置:首页 > 其它

lucene的多种搜索

2012-09-13 17:47 441 查看
package Java.se.lucene;
//主类
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;

public class Search {
private String[] ids={"1","2","3","4","5","6"};
private String[] emails={"aa@aa.com","bb@bb.com",
"cc@cc.com","dd@dd.com","ee@ee.com","ff@ff.com"};
private String[] contents={"i like  gdsfgfds","i like fsdfs","i like fdsfsd",
"i like fdsfsd","i like like fdfs","i like like like fsefsdfg"};
private int[] attachs={1,2,3,4,5,6};
private String[] names={"liwu","zhangsan","xiaoqinag","laona",
"dabao","lisi"};
private IndexWriter writer=null;
private Date[] dates=null;
private Map<String,Float> scores=new HashMap<String,Float>();
private Directory directory=null;
private IndexReader reader=null;

public Search()
{
directory=new RAMDirectory();
setDate();
index();
}
//建立索引
public void index()
{
Document document=null;
try {
writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36,
new StandardAnalyzer(Version.LUCENE_36)));
writer.deleteAll();//更新索引
for(int i=0;i<ids.length;i++)
{
document=new Document();
document.add(new Field("id", ids[i],
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS  ));
document.add(new Field("email",emails[i],
Field.Store.YES,Field.Index.NOT_ANALYZED));
document.add(new Field("content", contents[i],
Field.Store.YES, Field.Index.ANALYZED));
document.add(new Field("name",names[i],
Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
//为数字添加索引
document.add(new NumericField("attach", Field.Store.YES,true).
setIntValue(attachs[i]));
//为日期添加索引
document.add(new NumericField("date", Field.Store.YES,true)
.setLongValue(dates[i].getTime()));//记住要getTime

String str=emails[i].substring(emails[i].lastIndexOf("@")+1);
//System.out.println(str);
if(scores.containsKey(str))
{
document.setBoost(scores.get(str));
}else{
document.setBoost(0.5f);
}
writer.addDocument(document);
writer.commit();//提交writer
}
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (LockObtainFailedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
finally{
try {
writer.close();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
//获取searcher
public IndexSearcher getSearcher()
{
try {
if(reader==null)
{
reader=IndexReader.open(directory);
}else {
IndexReader rd=IndexReader.openIfChanged(reader);
if(reader!=null)
{
reader.close();
reader=rd;
}
}
return new IndexSearcher(reader);
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}
//精确查找 1
public void searchByTerm(String field,String name,int num)
{
try {
IndexSearcher searcher=getSearcher();
Query query =new TermQuery(new Term(field,name));
TopDocs tds=searcher.search(query, num);
System.out.println("一共查询了:"+tds.totalHits);
for(ScoreDoc sd:tds.scoreDocs)
{
Document doc=searcher.doc(sd.doc);
System.out.println(doc.get("id")+"-->"+doc.get("name")+".."
+"["+doc.get("email")+"]"+"("+doc.get("content")+")"+
"-->"+doc.get("attach")+".."+doc.get("date"));
}

searcher.close();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
//范围查询 2
public void searchByTermRange(String field,String start,String end,int num){

try {
IndexSearcher searcher=getSearcher();
Query query =new TermRangeQuery(field, start, end, true, true);
TopDocs tds=searcher.search(query, num);
System.out.println("一共查询了:"+tds.totalHits);
for(ScoreDoc sd:tds.scoreDocs)
{
Document doc=searcher.doc(sd.doc);
System.out.println(doc.get("id")+"-->"+doc.get("name")+".."
+"["+doc.get("email")+"]"+"("+doc.get("content")+")"+
"-->"+doc.get("attach")+".."+doc.get("date"));
}
searcher.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
//范围查询 3
public void searchByNumRange(String field,int start,int end,int num){

try {
IndexSearcher searcher=getSearcher();
Query query =NumericRangeQuery.newIntRange(field, start, end, true,true);
TopDocs tds=searcher.search(query, num);
System.out.println("一共查询了:"+tds.totalHits);
for(ScoreDoc sd:tds.scoreDocs)
{
Document doc=searcher.doc(sd.doc);
System.out.println(doc.get("id")+"-->"+doc.get("name")+".."
+"["+doc.get("email")+"]"+"("+doc.get("content")+")"+
"-->"+doc.get("attach")+".."+doc.get("date"));
}
searcher.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
//前缀搜索 4
public void searchByPrefix(String field,String value,int num)
{
try {
IndexSearcher searcher=getSearcher();
Query query =new PrefixQuery(new Term(field,value));
TopDocs tds=searcher.search(query, num);
System.out.println("一共查询了:"+tds.totalHits);
for(ScoreDoc sd:tds.scoreDocs)
{
Document doc=searcher.doc(sd.doc);
System.out.println(doc.get("id")+"-->"+doc.get("name")+".."
+"["+doc.get("email")+"]"+"("+doc.get("content")+")"+
"-->"+doc.get("attach")+".."+doc.get("date"));
}
searcher.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
//通配搜索 5
public void searchBywildCard(String field,String value,int num)
{
try {
IndexSearcher searcher=getSearcher();
//通配符操作符,?可以匹配一个通配符,*可以匹配多个通配符
Query query =new WildcardQuery(new Term(field,value));
TopDocs tds=searcher.search(query, num);
System.out.println("一共查询了:"+tds.totalHits);
for(ScoreDoc sd:tds.scoreDocs)
{
Document doc=searcher.doc(sd.doc);
System.out.println(doc.get("id")+"-->"+doc.get("name")+".."
+"["+doc.get("email")+"]"+"("+doc.get("content")+")"+
"-->"+doc.get("attach")+".."+doc.get("date"));
}
searcher.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
//Boolean查询 6
public void searchByBoolean(int num)
{
try {
IndexSearcher searcher=getSearcher();
//通配符操作符,?可以匹配一个通配符,*可以匹配多个通配符
Query query = new BooleanQuery();
((BooleanQuery) query).add(new TermQuery(new Term("name", "laona")),Occur.MUST);
((BooleanQuery) query).add(new TermQuery(new Term("content", "fdfs")),Occur.SHOULD);
TopDocs tds=searcher.search(query, num);
System.out.println("一共查询了:"+tds.totalHits);
for(ScoreDoc sd:tds.scoreDocs)
{
Document doc=searcher.doc(sd.doc);
System.out.println(doc.get("id")+"-->"+doc.get("name")+".."
+"["+doc.get("email")+"]"+"("+doc.get("content")+")"+
"-->"+doc.get("attach")+".."+doc.get("date"));
}
searcher.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
//短语查询 7
public void searchByPhrase(int num)
{
try {
IndexSearcher searcher=getSearcher();
//通配符操作符,?可以匹配一个通配符,*可以匹配多个通配符
PhraseQuery query = new PhraseQuery();
query.setSlop(1);
query.add(new Term("content","i"));
query.add(new Term("content","fdsfsd"));
TopDocs tds=searcher.search(query, num);
System.out.println("一共查询了:"+tds.totalHits);
for(ScoreDoc sd:tds.scoreDocs)
{
Document doc=searcher.doc(sd.doc);
System.out.println(doc.get("id")+"-->"+doc.get("name")+".."
+"["+doc.get("email")+"]"+"("+doc.get("content")+")"+
"-->"+doc.get("attach")+".."+doc.get("date"));
}
searcher.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
//模糊查询  8
public void searchByFuzzy(int num)
{
try {
IndexSearcher searcher=getSearcher();
//通配符操作符,?可以匹配一个通配符,*可以匹配多个通配符
Query query = new FuzzyQuery(new Term("name","laonaa"),0.5f);
TopDocs tds=searcher.search(query, num);
System.out.println("一共查询了:"+tds.totalHits);
for(ScoreDoc sd:tds.scoreDocs)
{
Document doc=searcher.doc(sd.doc);
System.out.println(doc.get("id")+"-->"+doc.get("name")+".."
+"["+doc.get("email")+"]"+"("+doc.get("content")+")"+
"-->"+doc.get("attach")+".."+doc.get("date"));
}
searcher.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
//创建日期
public void setDate()
{
SimpleDateFormat sdf=new SimpleDateFormat("yyyy-mm-kk");
try {
dates=new Date[ids.length];
dates[0]=sdf.parse("2010-08-17");
dates[1]=sdf.parse("2011-02-17");
dates[2]=sdf.parse("2012-03-17");
dates[3]=sdf.parse("2011-04-17");
dates[4]=sdf.parse("2012-05-17");
dates[5]=sdf.parse("2011-07-17");
} catch (Exception e) {
e.printStackTrace();
// TODO: handle exception
}
}
}
/测试类
package Java.se.lucene;

import org.junit.Before;
import org.junit.Test;

public class Test_Search {
private static final int num=10;
private Search search=null;

@Before
public void init()
{
search=new Search();
}
@Test
public void test_search01()
{
search.searchByTerm("content", "like", 6);
}
@Test
public void test_search02()
{
//查询(内容--content) 范围(a-z)
search.searchByTermRange("content","a","z",10);
//查询(内容--name) 范围(a-z)
search.searchByTermRange("content","a","z",10);
//查询(内容--attach) 范围(a-z) 查询不出
search.searchByTermRange("attach","2","6",10);
}
@Test
public void test_search03()
{
//查询(内容--content) 范围(a-z)
search.searchByNumRange("attach",1,4,num);

}
@Test
public void test_search04()
{
//查询(内容--content)
search.searchByPrefix("content","lgfdfg",num);

}
@Test
public void test_search05()
{
search.searchBywildCard("email","aa**",num);
}
@Test
public void test_search06()
{
search.searchByBoolean(num);
}
@Test
public void test_search07()
{
search.searchByPhrase(num);
}
@Test
public void test_search08()
{
search.searchByFuzzy(num);
}
//	public static void main(String[] args) {
//		Search search=new Search();
//		search.searchByTerm("name", "mike", 3);
//	}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: