您的位置:首页 > 其它

Lucene从入门到熟悉(二)检索

2014-07-20 11:22 239 查看

检索

IndexSearcher :检索操作的核心组件,用于对 IndexWriter 创建的索引执行,只读的检索操作,工作模式为接收 Query 对象而返回 ScoreDoc对象。

Term :检索的基本单元,标示检索的字段名称和检索对象的值,如Term( “title”, “lucene” )。即表示在 title 字段中搜寻关键词 lucene 。

Query :表示查询的抽象类,由相应的 Term 来标识。

TermQuery :最基本的查询类型,用于匹配含有指定值字段的文档。

TopDocs:保存查询结果的类。

ScoreDoc(Hits):用来装载搜索结果文档队列指针的数组容器。

样例代码:
package com.lucene.test.T02;

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class TestSearcher {
public static void main(String[] args) throws IOException {
Analyzer analyzer = new StandardAnalyzer();
String indexDir = "d:/temp/luceneindex";
Directory dir = FSDirectory.getDirectory(indexDir);
IndexSearcher searcher = new IndexSearcher(dir);
ScoreDoc[] hits = null;
Term term = new Term("address", "beijing");// 查找address为nanjing的列
TermQuery query = new TermQuery(term);
TopDocs topDocs = searcher.search(query, 10); // 通过searcher查找,10代表查出前10条,如果不够10天则全查出

hits = topDocs.scoreDocs;

for (int i = 0; i < hits.length; i++) {
Document doc = searcher.doc(hits[i].doc);
System.out.print(hits[i].score + " ");
System.out.print(doc.get("id") + " ");
System.out.print(doc.get("name") + " ");
System.out.print(doc.get("address") + " ");
System.out.println(doc.get("birthday") + " ");
}

searcher.close();
dir.close();
}
}


输出结果:
1.287682 2 lisi beijing 19860105 

1.287682 4 zhaoliu beijing 19550719 

注意:使用前一章建立索引的例子建索引,且要注意当Term term = new Term("address", "beijing");查询address列,前一章的document.add(new Field("address",address[i], Field.Store.YES,Field.Index.NO));没有保存索引,这样是查不到结果的,要改成Field.Index.ANALYZED。

RangSearch查找一定范围的值

package com.lucene.test.T02;

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.RangeQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class TestRangSearch {

public static void main(String[] args) throws IOException {
Analyzer analyzer = new StandardAnalyzer();
String indexDir = "d:/temp/luceneindex";
Directory dir = FSDirectory.getDirectory(indexDir);
IndexSearcher searcher = new IndexSearcher(dir);

ScoreDoc[] hits = null;
Term beginTerm = new Term("birthday", "19860105");
Term endTerm = new Term("birthday", "19880101");

RangeQuery query = new RangeQuery(beginTerm, endTerm, true);// RangQuery可以查找到某个范围的值

TopDocs topDocs = searcher.search(query, 2);

hits = topDocs.scoreDocs;
for (int i = 0; i < hits.length; i++) {
Document doc = searcher.doc(hits[i].doc);
System.out.print(hits[i].score + " ");
System.out.print(doc.get("id") + " ");
System.out.print(doc.get("name") + " ");
System.out.print(doc.get("address") + " ");
System.out.println(doc.get("birthday") + " ");
}
searcher.close();
dir.close();
}
}


输出:

1.1972358 1 zhangsan shanghai 19880101 

1.1972358 2 lisi beijing 19860105 

PrefixQuery 以XX开头的查询

package com.lucene.test.T02;

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class TestPrefixQuery {

public static void main(String[] args) throws IOException {
Analyzer analyzer = new StandardAnalyzer();
String indexDir = "d:/temp/luceneindex";
Directory dir = FSDirectory.getDirectory(indexDir);
IndexSearcher searcher = new IndexSearcher(dir);

ScoreDoc[] hits = null;

Term term=new Term("name","zh");//查找以zh开头的
PrefixQuery query=new PrefixQuery(term);

TopDocs topDocs=searcher.search(query, 10);

hits=topDocs.scoreDocs;
for(int i=0;i<hits.length;i++){
Document doc=searcher.doc(hits[i].doc);
System.out.print(hits[i].score+" ");
System.out.print(doc.get("id")+" ");
System.out.print(doc.get("name")+" ");
System.out.print(doc.get("address")+" ");
System.out.println(doc.get("birthday")+" ");
}
searcher.close();
dir.close();
}
}


输出:
1.1972358 1 zhangsan shanghai 19880101 

1.1972358 4 zhaoliu beijing 19550719 

FuzzyQuery 模糊查询
package com.lucene.test.T02;

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class TestFuzzyQuery {
public static void main(String[] args) throws IOException {
Analyzer analyzer = new StandardAnalyzer();
String indexDir = "d:/temp/luceneindex";
Directory dir = FSDirectory.getDirectory(indexDir);
IndexSearcher searcher = new IndexSearcher(dir);

ScoreDoc[] hits = null;
Term term=new Term("name","zhangsan");
FuzzyQuery query=new FuzzyQuery(term);

TopDocs topDocs=searcher.search(query, 10);

hits=topDocs.scoreDocs;
for(int i=0;i<hits.length;i++){
Document doc=searcher.doc(hits[i].doc);
System.out.print(hits[i].score+" ");
System.out.print(doc.ge
bed2
t("id")+" ");
System.out.print(doc.get("name")+" ");
System.out.print(doc.get("address")+" ");
System.out.println(doc.get("birthday")+" ");
}
searcher.close();
dir.close();
}
}


测试时将前一章建立索引的改为String[] names = { "zhangsan", "zhangsun", "zhangson", "zhaoliu" };便于测试,
模糊查询会将 zhangsan  zhangsun  zhangson查出来

输出结果:
1.1614895 1 zhangsan shanghai 19880101 

0.8711171 2 zhangsun beijing 19860105 

0.8711171 3 zhangson guangzhou 19760205 

BooleanQuery 可以将多个查询条件组合
package com.lucene.test.T02;

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class TestBooleanQuery {

public static void main(String[] args) throws IOException {
Analyzer analyzer=new StandardAnalyzer();
String indexDir = "d:/temp/luceneindex";
Directory dir=FSDirectory.getDirectory(indexDir);
IndexSearcher searcher=new IndexSearcher(dir);
ScoreDoc [] hits=null;
Term nterm=new Term("name","*g??");
WildcardQuery wildcardQuery=new WildcardQuery(nterm);
Term aterm=new Term("address","tianjing");
TermQuery termQuery=new TermQuery(aterm);
//booleanQuery 可以将多个query条件组合起来
BooleanQuery query=new BooleanQuery();
query.add(wildcardQuery, BooleanClause.Occur.SHOULD);//可以满足
query.add(termQuery, BooleanClause.Occur.MUST_NOT); //必须
TopDocs topDocs=searcher.search(query, 10);

hits=topDocs.scoreDocs;
for(int i=0;i<hits.length;i++){
Document doc=searcher.doc(hits[i].doc);
//		    System.out.print(hits[i].score+" ");
System.out.print(doc.get("id")+" ");
System.out.print(doc.get("name")+" ");
System.out.print(doc.get("address")+" ");
System.out.println(doc.get("birthday")+" ");
}
searcher.close();
dir.close();
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  lucene Cloud