Lucene3.6 之 Filter
2015-10-16 09:11
253 查看
1、TermRangeFilter
A Filter that restricts search results to a range of term values in a given field.
This filter matches the documents looking for terms that fall into the supplied range according to Byte.compareTo(Byte), It is not intended for numerical ranges; use NumericRangeFilter instead.
If you construct a large number of range filters with different ranges but on the same field, FieldCacheRangeFilter may have significantly better performance.
示例代码
[java] view
plaincopy
@Test
public void testTermRangeFilter(){
try {
String path = "D:\\LuceneEx\\day01";
String keyword = "android";
File file = new File(path);
Directory mdDirectory = FSDirectory.open(file);
// 使用 商业分词器
Analyzer mAnalyzer = new IKAnalyzer();
IndexReader reader = IndexReader.open(mdDirectory);
IndexSearcher searcher = new IndexSearcher(reader);
String[] fields = { "title", "category" }; // (在多个Filed中搜索)
QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_36,
fields, mAnalyzer);
Query query = parser.parse(keyword);
//查询publishTime范围在2011-09 - 2012-06之间的记录
Filter filter = new TermRangeFilter("publishTime", "2011-09", "2012-06", true, true);
TopDocs tops = searcher.search(query, filter, 100);
int count = tops.totalHits;
System.out.println("totalHits=" + count);
ScoreDoc[] docs = tops.scoreDocs;
for (int i = 0; i < docs.length; i++) {
Document doc = searcher.doc(docs[i].doc);
float score = docs[i].score;
int id = Integer.parseInt(doc.get("id"));
String title = doc.get("title");
String author = doc.get("author");
String publishTime = doc.get("publishTime");
String source = doc.get("source");
String category = doc.get("category");
float reputation = Float.parseFloat(doc.get("reputation"));
System.out.println(id + "\t" + title + "\t" + author + "\t"
+ publishTime + "\t" + source + "\t" + category + "\t"
+ reputation+"\t"+score);
}
reader.close();
searcher.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
}
}
2、NumericRangeFilter
A Filter that only accepts numeric values within a specified range. To use this, you must first index the numeric values using IntField, FloatField, LongField or DoubleField (expert:
NumericTokenStream).
示例代码
@Test
public void testNumericRangeFilter(){
try {
String path = "D:\\LuceneEx\\day02";
String keyword = "android";
File file = new File(path);
Directory mdDirectory = FSDirectory.open(file);
// 使用 商业分词器
Analyzer mAnalyzer = new IKAnalyzer();
IndexReader reader = IndexReader.open(mdDirectory);
IndexSearcher searcher = new IndexSearcher(reader);
String[] fields = { "title", "category" }; // (在多个Filed中搜索)
QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_36,
fields, mAnalyzer);
Query query = parser.parse(keyword);
//过滤 reputation 在9.0f到 9.8 分之间 的记录
Filter filter = NumericRangeFilter.newFloatRange("reputation", 9.0f, 9.8f, true, true);
TopDocs tops = searcher.search(query, filter, 100);
int count = tops.totalHits;
System.out.println("totalHits=" + count);
ScoreDoc[] docs = tops.scoreDocs;
for (int i = 0; i < docs.length; i++) {
Document doc = searcher.doc(docs[i].doc);
float score = docs[i].score;
int id = Integer.parseInt(doc.get("id"));
String title = doc.get("title");
String author = doc.get("author");
String publishTime = doc.get("publishTime");
String source = doc.get("source");
String category = doc.get("category");
float reputation = Float.parseFloat(doc.get("reputation"));
System.out.println(id + "\t" + title + "\t" + author + "\t"
+ publishTime + "\t" + source + "\t" + category + "\t"
+ reputation+"\t"+score);
}
reader.close();
searcher.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
}
}
A Filter that restricts search results to a range of term values in a given field.
This filter matches the documents looking for terms that fall into the supplied range according to Byte.compareTo(Byte), It is not intended for numerical ranges; use NumericRangeFilter instead.
If you construct a large number of range filters with different ranges but on the same field, FieldCacheRangeFilter may have significantly better performance.
示例代码
[java] view
plaincopy
@Test
public void testTermRangeFilter(){
try {
String path = "D:\\LuceneEx\\day01";
String keyword = "android";
File file = new File(path);
Directory mdDirectory = FSDirectory.open(file);
// 使用 商业分词器
Analyzer mAnalyzer = new IKAnalyzer();
IndexReader reader = IndexReader.open(mdDirectory);
IndexSearcher searcher = new IndexSearcher(reader);
String[] fields = { "title", "category" }; // (在多个Filed中搜索)
QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_36,
fields, mAnalyzer);
Query query = parser.parse(keyword);
//查询publishTime范围在2011-09 - 2012-06之间的记录
Filter filter = new TermRangeFilter("publishTime", "2011-09", "2012-06", true, true);
TopDocs tops = searcher.search(query, filter, 100);
int count = tops.totalHits;
System.out.println("totalHits=" + count);
ScoreDoc[] docs = tops.scoreDocs;
for (int i = 0; i < docs.length; i++) {
Document doc = searcher.doc(docs[i].doc);
float score = docs[i].score;
int id = Integer.parseInt(doc.get("id"));
String title = doc.get("title");
String author = doc.get("author");
String publishTime = doc.get("publishTime");
String source = doc.get("source");
String category = doc.get("category");
float reputation = Float.parseFloat(doc.get("reputation"));
System.out.println(id + "\t" + title + "\t" + author + "\t"
+ publishTime + "\t" + source + "\t" + category + "\t"
+ reputation+"\t"+score);
}
reader.close();
searcher.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
}
}
2、NumericRangeFilter
A Filter that only accepts numeric values within a specified range. To use this, you must first index the numeric values using IntField, FloatField, LongField or DoubleField (expert:
NumericTokenStream).
示例代码
@Test
public void testNumericRangeFilter(){
try {
String path = "D:\\LuceneEx\\day02";
String keyword = "android";
File file = new File(path);
Directory mdDirectory = FSDirectory.open(file);
// 使用 商业分词器
Analyzer mAnalyzer = new IKAnalyzer();
IndexReader reader = IndexReader.open(mdDirectory);
IndexSearcher searcher = new IndexSearcher(reader);
String[] fields = { "title", "category" }; // (在多个Filed中搜索)
QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_36,
fields, mAnalyzer);
Query query = parser.parse(keyword);
//过滤 reputation 在9.0f到 9.8 分之间 的记录
Filter filter = NumericRangeFilter.newFloatRange("reputation", 9.0f, 9.8f, true, true);
TopDocs tops = searcher.search(query, filter, 100);
int count = tops.totalHits;
System.out.println("totalHits=" + count);
ScoreDoc[] docs = tops.scoreDocs;
for (int i = 0; i < docs.length; i++) {
Document doc = searcher.doc(docs[i].doc);
float score = docs[i].score;
int id = Integer.parseInt(doc.get("id"));
String title = doc.get("title");
String author = doc.get("author");
String publishTime = doc.get("publishTime");
String source = doc.get("source");
String category = doc.get("category");
float reputation = Float.parseFloat(doc.get("reputation"));
System.out.println(id + "\t" + title + "\t" + author + "\t"
+ publishTime + "\t" + source + "\t" + category + "\t"
+ reputation+"\t"+score);
}
reader.close();
searcher.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
}
}
相关文章推荐
- Lucene_demo06_几种搜索
- 深入坐标系 和坐标系相关的四个属
- PyQT信号槽_学习笔记 (Signals/Slots)
- C++ 远程 shell (cmd)
- iOS应用架构谈 网络层设计方案
- 黑马程序员——this,super,interface,final,abstract,static
- ios66详解之时间戳与时间的转换
- 第七周上机实践项目5——排队看病模拟
- SimpleDateFormat的线程安全问题
- 第四周项目3 单链表应用(3)
- nginx搭建https服务器
- 基于 Lucene 的8 个开源搜索引擎
- 什么是垂直搜索引擎
- AutoLayout代码布局使用大全—一种全新的布局思想
- angularJS与bootstrap结合实现动态加载弹出提示内容
- 第七周 项目4-队列数组
- 第六周项目1—建立顺序栈算法库
- 搜索引擎的工作原理
- Lucene_demo09_txt文件索引
- Linux:32/64位程序(应用程序、共享库、内核模块)