lucene QueryParser用法示例
2012-10-09 17:02
225 查看
Java代码
packagedemo.first;
importjava.io.IOException;
importjava.util.Date;
importorg.apache.lucene.analysis.standard.StandardAnalyzer;
importorg.apache.lucene.document.Document;
importorg.apache.lucene.document.Field;
importorg.apache.lucene.index.CorruptIndexException;
importorg.apache.lucene.index.IndexWriter;
importorg.apache.lucene.queryParser.QueryParser;
importorg.apache.lucene.search.Hits;
importorg.apache.lucene.search.IndexSearcher;
importorg.apache.lucene.search.Query;
importorg.apache.lucene.search.Searcher;
importorg.apache.lucene.store.LockObtainFailedException;
publicclassTestQueryParser{
publicstaticvoidmain(String[]args)throwsCorruptIndexException,IOException{
Stringpath="D://workspace//fwk//lucenedemo//firstLuceneIndex";
TestQueryParsertqp=newTestQueryParser();
tqp.createIndex(path);
Searchersearch=tqp.getSearcher(path);
System.out.println("#_2");
tqp.testTime(search,tqp.getQueryParser2());
System.out.println("#_1");
tqp.testTime(search,tqp.getQueryParser1());
System.out.println("#_3");
tqp.testTime(search,tqp.getQueryParser3());
System.out.println("#_4");
tqp.testTime(search,tqp.getQueryParser4());
System.out.println("#_5");
tqp.testTime(search,tqp.getQueryParser5());
System.out.println("#_6");
tqp.testTime(search,tqp.getQueryParser6());
System.out.println("#_7");
tqp.testTime(search,tqp.getQueryParser7());
}
publicvoidtestTime(Searchersearch,Queryquery)throwsIOException{
Datestart=newDate();
Hitshits=search.search(query);
for(inti=0;i<hits.length();i++){
System.out.println(hits.id(i));
System.out.println(hits.doc(i));
System.out.println(hits.score(i));
}
System.out.println("本次搜索用时:"+((newDate()).getTime()-start.getTime())+"毫秒");
}
publicSearchergetSearcher(Stringpath)throwsCorruptIndexException,IOException{
returnnewIndexSearcher(path);
}
publicQuerygetQueryParser1(){
//默认搜索字段
QueryParserqueryParser=newQueryParser("content",newStandardAnalyzer());
try{
returnqueryParser.parse("搜索-擎");
}catch(Exceptione){
e.printStackTrace();
}
returnnull;
}
publicQuerygetQueryParser2(){
QueryParserqueryParser=newQueryParser("content",newStandardAnalyzer());
try{
returnqueryParser.parse("欢迎");
}catch(Exceptione){
e.printStackTrace();
}
returnnull;
}
publicQuerygetQueryParser3(){
QueryParserqueryParser=newQueryParser("content",newStandardAnalyzer());
try{
returnqueryParser.parse("搜索and擎");
}catch(Exceptione){
e.printStackTrace();
}
returnnull;
}
publicQuerygetQueryParser4(){
QueryParserqueryParser=newQueryParser("content",newStandardAnalyzer());
try{
//content字段搜索索引title字段搜寻你好
returnqueryParser.parse("索引title:你好");
}catch(Exceptione){
e.printStackTrace();
}
returnnull;
}
publicQuerygetQueryParser5(){
QueryParserqueryParser=newQueryParser("content",newStandardAnalyzer());
//允许使用正则表达式方式,则可模糊查询
queryParser.setAllowLeadingWildcard(true);
try{
returnqueryParser.parse("*索*");
}catch(Exceptione){
e.printStackTrace();
}
returnnull;
}
publicQuerygetQueryParser6(){
QueryParserqueryParser=newQueryParser("testCapital",newStandardAnalyzer());
try{
returnqueryParser.parse("hellOwangzi");
}catch(Exceptione){
e.printStackTrace();
}
returnnull;
}
publicQuerygetQueryParser7(){
QueryParserqueryParser=newQueryParser("testAndOr",newStandardAnalyzer());
try{
//returnqueryParser.parse("and");
returnqueryParser.parse("test");
}catch(Exceptione){
e.printStackTrace();
}
returnnull;
}
publicvoidcreateIndex(Stringpath){
try{
IndexWriterwriter=newIndexWriter(path,newStandardAnalyzer(),true);
DocumentdocA=newDocument();
//相当于数据库中列的概念,因此第一个参数是列名,第二个参数是列的值,最后两个参数是enum类型的(JDK1.5),对创建的索引的设置
//Field.Store是否覆盖原来的索引文件,而不是重新建一个
FieldfieldA=newField("content","搜索引擎",Field.Store.YES,Field.Index.TOKENIZED);
//我们把列(fieldA)加到某一行(docA)中
docA.add(fieldA);
docA.add(newField("title","你好中国",Field.Store.YES,Field.Index.TOKENIZED));
docA.add(newField("content","欢迎你llying",Field.Store.YES,Field.Index.TOKENIZED));
docA.add(newField("lastModifyTime","2008-9-17",Field.Store.YES,Field.Index.TOKENIZED));
docA.add(newField("testCapital","HelloWangzi",Field.Store.YES,Field.Index.TOKENIZED));
docA.add(newField("testAndOr","testand",Field.Store.YES,Field.Index.TOKENIZED));
DocumentdocB=newDocument();
//相当于数据库中列的概念,因此第一个参数是列名,第二个参数是列的值,最后两个参数是enum类型的(JDK1.5),对创建的索引的设置
FieldfieldB=newField("content","创建索引",Field.Store.YES,Field.Index.TOKENIZED);
//我们把列(fieldA)加到某一行(docA)中
docB.add(fieldB);
docB.add(newField("title","你好世界",Field.Store.YES,Field.Index.TOKENIZED));
docB.add(newField("content","欢迎加入jee高级开发群46176507",Field.Store.YES,Field.Index.TOKENIZED));
docB.add(newField("lastModifyTime","2008-9-6",Field.Store.YES,Field.Index.TOKENIZED));
docB.add(newField("testCapital","hellowangZi",Field.Store.YES,Field.Index.TOKENIZED));
docB.add(newField("testAndOr","testor",Field.Store.YES,Field.Index.TOKENIZED));
writer.addDocument(docA);
writer.addDocument(docB);
//如果对海量数据进行创建索引的时候,需要对索引进行优化,以便提高速度
writer.optimize();
//跟数据库类似,打开一个连接,使用完后,要关闭它
writer.close();
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(LockObtainFailedExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
}
}
示例2:
/**
* 通过搜索条件获取列表信息
* @param keyword 搜索关键字
* @return List
* */
public static String findGoodsByKeyword(String keyword)
{
StringBuilder sb = new StringBuilder();
IndexSearcher searcher = null;
Directory dir = null;
StandardAnalyzer analyzer = null;
try
{
keyword = getKeyword(keyword);
BooleanQuery booleanQuery = new BooleanQuery();
dir = FSDirectory.open(new File(ParamInit.getInitParameter("lucene_path")));
searcher = new IndexSearcher(dir, true);
analyzer = new StandardAnalyzer(Version.LUCENE_30);
MultiFieldQueryParser multiParser1=new MultiFieldQueryParser(Version.LUCENE_CURRENT, new String[]{"name1" , "nameeng1" , "country"},analyzer);
// OR : 设置以空格分开的短语keword是'或'的关系;
multiParser1.setDefaultOperator(QueryParser.Operator.OR );
//允许使用正则表达式方式,"*" 可模糊查询
multiParser1.setAllowLeadingWildcard(true);
//keword 可以使用空格隔开,"*" 可模糊查询
Query multi_parser1 = multiParser1.parse("*"+keyword+"*" );
System.out.println("Query转化后的Term内容为:"+multi_parser1.toString());
//SHOULD:表示“或”关系,最终检索结果为所有检索子句的并集
booleanQuery.add(multi_parser1 , BooleanClause.Occur.SHOULD);
//SortField.SCORE : 关键字搜索按近似排序,Score--匹配分值
Sort sort = new Sort(new SortField[]{new SortField("status" , SortField.STRING , true),new SortField("name1" , SortField.SCORE , false),new SortField("nameeng1" , SortField.SCORE, false),new SortField("country" , SortField.SCORE, false)});
TopFieldCollector topCollector = TopFieldCollector.create(sort , 8 , true , true , true , false);
searcher.search(booleanQuery , null , topCollector);
ScoreDoc[] docs = topCollector.topDocs(0).scoreDocs;
int i = 1;
for(ScoreDoc scdoc : docs)
{
Document doc = searcher.doc(scdoc.doc);
sb.append("<li id='").append(doc.get("id")).append("'>").append(doc.get("name")) .append("</li>");
i++;
if(i >8)
break;
}
}
catch (Exception e)
{
e.printStackTrace();
}
finally
{
try
{
if(null != analyzer)analyzer.close();
if(null != searcher)searcher.close();
if(null != dir)dir.close();
}
catch (IOException e)
{
e.printStackTrace();
}
}
return sb.toString();
}
packagedemo.first;
importjava.io.IOException;
importjava.util.Date;
importorg.apache.lucene.analysis.standard.StandardAnalyzer;
importorg.apache.lucene.document.Document;
importorg.apache.lucene.document.Field;
importorg.apache.lucene.index.CorruptIndexException;
importorg.apache.lucene.index.IndexWriter;
importorg.apache.lucene.queryParser.QueryParser;
importorg.apache.lucene.search.Hits;
importorg.apache.lucene.search.IndexSearcher;
importorg.apache.lucene.search.Query;
importorg.apache.lucene.search.Searcher;
importorg.apache.lucene.store.LockObtainFailedException;
publicclassTestQueryParser{
publicstaticvoidmain(String[]args)throwsCorruptIndexException,IOException{
Stringpath="D://workspace//fwk//lucenedemo//firstLuceneIndex";
TestQueryParsertqp=newTestQueryParser();
tqp.createIndex(path);
Searchersearch=tqp.getSearcher(path);
System.out.println("#_2");
tqp.testTime(search,tqp.getQueryParser2());
System.out.println("#_1");
tqp.testTime(search,tqp.getQueryParser1());
System.out.println("#_3");
tqp.testTime(search,tqp.getQueryParser3());
System.out.println("#_4");
tqp.testTime(search,tqp.getQueryParser4());
System.out.println("#_5");
tqp.testTime(search,tqp.getQueryParser5());
System.out.println("#_6");
tqp.testTime(search,tqp.getQueryParser6());
System.out.println("#_7");
tqp.testTime(search,tqp.getQueryParser7());
}
publicvoidtestTime(Searchersearch,Queryquery)throwsIOException{
Datestart=newDate();
Hitshits=search.search(query);
for(inti=0;i<hits.length();i++){
System.out.println(hits.id(i));
System.out.println(hits.doc(i));
System.out.println(hits.score(i));
}
System.out.println("本次搜索用时:"+((newDate()).getTime()-start.getTime())+"毫秒");
}
publicSearchergetSearcher(Stringpath)throwsCorruptIndexException,IOException{
returnnewIndexSearcher(path);
}
publicQuerygetQueryParser1(){
//默认搜索字段
QueryParserqueryParser=newQueryParser("content",newStandardAnalyzer());
try{
returnqueryParser.parse("搜索-擎");
}catch(Exceptione){
e.printStackTrace();
}
returnnull;
}
publicQuerygetQueryParser2(){
QueryParserqueryParser=newQueryParser("content",newStandardAnalyzer());
try{
returnqueryParser.parse("欢迎");
}catch(Exceptione){
e.printStackTrace();
}
returnnull;
}
publicQuerygetQueryParser3(){
QueryParserqueryParser=newQueryParser("content",newStandardAnalyzer());
try{
returnqueryParser.parse("搜索and擎");
}catch(Exceptione){
e.printStackTrace();
}
returnnull;
}
publicQuerygetQueryParser4(){
QueryParserqueryParser=newQueryParser("content",newStandardAnalyzer());
try{
//content字段搜索索引title字段搜寻你好
returnqueryParser.parse("索引title:你好");
}catch(Exceptione){
e.printStackTrace();
}
returnnull;
}
publicQuerygetQueryParser5(){
QueryParserqueryParser=newQueryParser("content",newStandardAnalyzer());
//允许使用正则表达式方式,则可模糊查询
queryParser.setAllowLeadingWildcard(true);
try{
returnqueryParser.parse("*索*");
}catch(Exceptione){
e.printStackTrace();
}
returnnull;
}
publicQuerygetQueryParser6(){
QueryParserqueryParser=newQueryParser("testCapital",newStandardAnalyzer());
try{
returnqueryParser.parse("hellOwangzi");
}catch(Exceptione){
e.printStackTrace();
}
returnnull;
}
publicQuerygetQueryParser7(){
QueryParserqueryParser=newQueryParser("testAndOr",newStandardAnalyzer());
try{
//returnqueryParser.parse("and");
returnqueryParser.parse("test");
}catch(Exceptione){
e.printStackTrace();
}
returnnull;
}
publicvoidcreateIndex(Stringpath){
try{
IndexWriterwriter=newIndexWriter(path,newStandardAnalyzer(),true);
DocumentdocA=newDocument();
//相当于数据库中列的概念,因此第一个参数是列名,第二个参数是列的值,最后两个参数是enum类型的(JDK1.5),对创建的索引的设置
//Field.Store是否覆盖原来的索引文件,而不是重新建一个
FieldfieldA=newField("content","搜索引擎",Field.Store.YES,Field.Index.TOKENIZED);
//我们把列(fieldA)加到某一行(docA)中
docA.add(fieldA);
docA.add(newField("title","你好中国",Field.Store.YES,Field.Index.TOKENIZED));
docA.add(newField("content","欢迎你llying",Field.Store.YES,Field.Index.TOKENIZED));
docA.add(newField("lastModifyTime","2008-9-17",Field.Store.YES,Field.Index.TOKENIZED));
docA.add(newField("testCapital","HelloWangzi",Field.Store.YES,Field.Index.TOKENIZED));
docA.add(newField("testAndOr","testand",Field.Store.YES,Field.Index.TOKENIZED));
DocumentdocB=newDocument();
//相当于数据库中列的概念,因此第一个参数是列名,第二个参数是列的值,最后两个参数是enum类型的(JDK1.5),对创建的索引的设置
FieldfieldB=newField("content","创建索引",Field.Store.YES,Field.Index.TOKENIZED);
//我们把列(fieldA)加到某一行(docA)中
docB.add(fieldB);
docB.add(newField("title","你好世界",Field.Store.YES,Field.Index.TOKENIZED));
docB.add(newField("content","欢迎加入jee高级开发群46176507",Field.Store.YES,Field.Index.TOKENIZED));
docB.add(newField("lastModifyTime","2008-9-6",Field.Store.YES,Field.Index.TOKENIZED));
docB.add(newField("testCapital","hellowangZi",Field.Store.YES,Field.Index.TOKENIZED));
docB.add(newField("testAndOr","testor",Field.Store.YES,Field.Index.TOKENIZED));
writer.addDocument(docA);
writer.addDocument(docB);
//如果对海量数据进行创建索引的时候,需要对索引进行优化,以便提高速度
writer.optimize();
//跟数据库类似,打开一个连接,使用完后,要关闭它
writer.close();
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(LockObtainFailedExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
}
}
示例2:
/**
* 通过搜索条件获取列表信息
* @param keyword 搜索关键字
* @return List
* */
public static String findGoodsByKeyword(String keyword)
{
StringBuilder sb = new StringBuilder();
IndexSearcher searcher = null;
Directory dir = null;
StandardAnalyzer analyzer = null;
try
{
keyword = getKeyword(keyword);
BooleanQuery booleanQuery = new BooleanQuery();
dir = FSDirectory.open(new File(ParamInit.getInitParameter("lucene_path")));
searcher = new IndexSearcher(dir, true);
analyzer = new StandardAnalyzer(Version.LUCENE_30);
MultiFieldQueryParser multiParser1=new MultiFieldQueryParser(Version.LUCENE_CURRENT, new String[]{"name1" , "nameeng1" , "country"},analyzer);
// OR : 设置以空格分开的短语keword是'或'的关系;
multiParser1.setDefaultOperator(QueryParser.Operator.OR );
//允许使用正则表达式方式,"*" 可模糊查询
multiParser1.setAllowLeadingWildcard(true);
//keword 可以使用空格隔开,"*" 可模糊查询
Query multi_parser1 = multiParser1.parse("*"+keyword+"*" );
System.out.println("Query转化后的Term内容为:"+multi_parser1.toString());
//SHOULD:表示“或”关系,最终检索结果为所有检索子句的并集
booleanQuery.add(multi_parser1 , BooleanClause.Occur.SHOULD);
//SortField.SCORE : 关键字搜索按近似排序,Score--匹配分值
Sort sort = new Sort(new SortField[]{new SortField("status" , SortField.STRING , true),new SortField("name1" , SortField.SCORE , false),new SortField("nameeng1" , SortField.SCORE, false),new SortField("country" , SortField.SCORE, false)});
TopFieldCollector topCollector = TopFieldCollector.create(sort , 8 , true , true , true , false);
searcher.search(booleanQuery , null , topCollector);
ScoreDoc[] docs = topCollector.topDocs(0).scoreDocs;
int i = 1;
for(ScoreDoc scdoc : docs)
{
Document doc = searcher.doc(scdoc.doc);
sb.append("<li id='").append(doc.get("id")).append("'>").append(doc.get("name")) .append("</li>");
i++;
if(i >8)
break;
}
}
catch (Exception e)
{
e.printStackTrace();
}
finally
{
try
{
if(null != analyzer)analyzer.close();
if(null != searcher)searcher.close();
if(null != dir)dir.close();
}
catch (IOException e)
{
e.printStackTrace();
}
}
return sb.toString();
}
相关文章推荐
- org.apache.lucene.queryParser.ParseException: Encountered "<EOF>" at line 1, column 0.
- Lucene索引库查询;使用MatchAllDocsQuery查询索引目录中的所有文档;及代码示例
- Lucene Query Parser Syntax
- lucene查询 之 TermQuery,通过项查询,及代码示例;TermQuery不使用分析器所以建议匹配不分词的Field域查询
- lucene4下用MultiFieldQueryParser同时搜索多个field时,结果的score浅析
- org.apache.lucene.queryParser.ParseException: Encountered "<EOF>" at line 1, column 0.
- org.apache.lucene.queryParser.ParseException: Encountered "<EOF>" at line 1, column 0.
- lucene学习之queryParser
- lucene---QueryParser用法示例
- [ lucene FAQ ] Lucene QueryParser Exception : Encountered <EOF> at line *
- MySQL5.7.20源码安装以及pt-query-digest用法示例
- lucene3.0中使用MultiFieldQueryParser多字段查找
- Lucene2.4之PhraseQuery的用法
- org.apache.lucene.queryparser.classic.ParseException: Encountered "<EOF>" at line 1, column 0.
- JS Cookies用法示例:记事贴
- (转)lucene.net 的查询方式query,条件判断
- Lucene 2.9 NumericRangeQuery 性能提升大
- perl 函数, 参数, @_, $_, $_[0], shift 的用法示例
- AjaxPanel自定义控件实现页面无刷新数据交互(做了个示例程序, 效果确实比较Cool, 用法非常简单! )(示例代码下载)