您的位置:首页 > 其它

Lucene 入门和简单封装

2016-03-06 09:17 555 查看
package com.whf.demo;

import java.io.IOException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;

/**
* document(包含多个Field) Field(一个属性) Analyzer(分词处理) IndexWriter(创建索引)
* Directory(索引的存储的位置,FSDirectory和RAMDirectory) Term(String field->String
* 待检索的关键词)
*
* @author whf
*
*/
public class LuceneManager {

private volatile static LuceneManager singleton = null;
private volatile static IndexWriter writer = null;
private volatile static IndexReader reader = null;
private volatile static IndexSearcher searcher = null;

private final Lock writerLock = new ReentrantLock();
private final static Object obj = new Object();
private static Version version = Version.LUCENE_CURRENT;

/**
* 单例构造函数
*
* @return
*/
private LuceneManager() {
}

/**
* 获取LuceneTools单例实例(obj同步)
*
* @return
*/
public static LuceneManager getInstance() {
if (null == singleton) {
synchronized (obj) {
if (null == singleton) {
singleton = new LuceneManager();
}
}
}
return singleton;
}

/**
* 获取IndexWriter单例实例(writerLock同步) 写单例
*
* @param dir
* @param config
* @return
*/
public IndexWriter getIndexWriter(Directory dir, IndexWriterConfig config) {
if (dir == null)
throw new IllegalArgumentException("Directory can not be null.");
if (config == null)
throw new IllegalArgumentException(
"IndexWriterConfig can not be null.");
try {
writerLock.lock();
if (writer == null) {
if (IndexWriter.isLocked(dir)) {
throw new LockObtainFailedException(
"Directory of index had been locked.");
}
writer = new IndexWriter(dir, config);
}
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
writerLock.unlock();
}
return writer;
}

/**
* 获取IndexReader对象 写多例
*
* @param dir
* @param enableNRTReader
*            是否开启NRTReader
* @return
*/
public IndexReader getIndexReader(Directory dir, boolean enableNRTReader) {
if (dir == null)
throw new IllegalArgumentException("Directory can not be null.");
try {
if (reader == null) {
reader = DirectoryReader.open(dir);
} else {
if (enableNRTReader && reader instanceof DirectoryReader) {
// 开启近实时Reader,能立即看到动态添加/删除的索引变化
reader = DirectoryReader
.openIfChanged((DirectoryReader) reader);
}
}
} catch (IOException e) {
e.printStackTrace();
}
return reader;
}

/**
* 获取IndexReader对象(默认不启用NETReader)
*
* @param dir
* @return
*/
public IndexReader getIndexReader(Directory dir) {
return getIndexReader(dir, false);
}

/**
* 获取IndexSearcher对象
*
* @param reader
*            IndexReader对象实例
* @param executor
*            如果你需要开启多线程查询,请提供ExecutorService对象参数
* @return
*/
public IndexSearcher getIndexSearcher(IndexReader reader,
ExecutorService executor) {
if (reader == null)
throw new IllegalArgumentException(
"The indexReader can not be null.");
if (searcher == null)
searcher = new IndexSearcher(reader);
return searcher;
}

/**
* 获取IndexSearcher对象(不支持多线程查询)
*
* @param reader
*            IndexReader对象实例
* @return
*/
public IndexSearcher getIndexSearcher(IndexReader reader) {
return getIndexSearcher(reader, null);
}

/**
* 创建QueryParser对象 QueryParser parser=new QueryParser("字段名称","分析器实例"); Query
* q=parser.parse("关键词");
* parser.setDefaultOperator(QueryParser.Opertator.AND);
* 同时含有多个关键字,如果是QueryParser.Opertator.OR表示或者 IndexSearcher searcher=new
* IndexSearcher(reader); Hits hit=searcher.search(q);
*
* @param field
* @param analyzer
* @return
*/
public static QueryParser createQueryParser(String field, Analyzer analyzer) {
return new QueryParser(field, analyzer);
}

/**
* 创建QueryParser对象 QueryParser parser=new QueryParser("字段名称","分析器实例"); Query
* q=parser.parse("关键词");
* parser.setDefaultOperator(QueryParser.Opertator.AND);
* 同时含有多个关键字,如果是QueryParser.Opertator.OR表示或者 IndexSearcher searcher=new
* IndexSearcher(reader); Hits hit=searcher.search(q);
*
* @param field
* @param analyzer
* @return
*/
public static QueryParser createMultiFieldQueryParser(String[] fields,
Analyzer analyzer) {
return new MultiFieldQueryParser(fields, analyzer);
}

/**
* 关闭IndexWriter
*
* @param writer
*/
public static void closeIndexWriter(IndexWriter writer) {
if (writer != null) {
try {
writer.close();
writer = null;
} catch (IOException e) {
e.printStackTrace();
}
}
}

/**
* 关闭IndexReader
*
* @param reader
*/
public static void closeIndexReader(IndexReader reader) {
if (reader != null) {
try {
reader.close();
reader = null;
} catch (IOException e) {
e.printStackTrace();
}
}
}

/**
* 关闭IndexReader和IndexWriter
*
* @param reader
* @param writer
*/
public static void closeAll(IndexReader reader, IndexWriter writer) {
closeIndexReader(reader);
closeIndexWriter(writer);
}
}


package com.whf.demo;

import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ExecutorService;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

import com.whf.pojo.HighlighterParam;
import com.whf.pojo.Page;

public class LuceneUtil {

private static final LuceneManager manager = LuceneManager.getInstance();
private static Analyzer analyzer = new IKAnalyzer(true);
private static Version version = Version.LUCENE_CURRENT;
private static FSDirectory fsDirectory = null;
private static RAMDirectory ramDirectory = null;

/**
* 打开索引目录
*
* @param luceneDir
* @return
* @throws IOException
*/
public static FSDirectory openFSDirectory(String luceneDir) {
if (fsDirectory == null)
try {
File dir = new File(luceneDir);
if (!dir.exists())
dir.mkdirs();
fsDirectory = FSDirectory.open(Paths.get(luceneDir));
// 注意:isLocked方法内部会试图去获取Lock,
// 如果获取到Lock,会关闭它,否则return false表示索引目录没有被锁.
// 这也就是为什么unlock方法被从IndexWriter类中移除的原因
IndexWriter.isLocked(fsDirectory);
} catch (IOException e) {
e.printStackTrace();
}
return fsDirectory;
}

/**
* 打开内存目录
*
* @param luceneDir
* @return
* @throws IOException
*/
public static RAMDirectory openRAMDirectory() {
if (ramDirectory == null)
return new RAMDirectory();
else
return ramDirectory;
}

/**
* 关闭索引目录并销毁
*
* @param directory
* @throws IOException
*/
public static void closeDirectory(Directory directory) {
if (directory != null) {
try {
directory.close();
} catch (IOException e) {
e.printStackTrace();
}
directory = null;
}
}

/**
* 关闭IndexReader
*
* @param reader
*/
public static void closeIndexReader(IndexReader reader) {
if (reader != null) {
try {
reader.close();
reader = null;
} catch (IOException e) {
e.printStackTrace();
}
}
}

/**
* 关闭IndexWriter
*
* @param writer
*/
public static void closeIndexWriter(IndexWriter writer) {
if (writer != null) {
try {
writer.close();
writer = null;
} catch (IOException e) {
e.printStackTrace();
}
}
}

/**
* 关闭IndexReader和IndexWriter
*
* @param reader
* @param writer
*/
public static void closeAll(IndexReader reader, IndexWriter writer) {
closeIndexReader(reader);
closeIndexWriter(writer);
}

/**
* 获取IndexWriter
*
* @param dir
* @param config
* @return
*/
public static IndexWriter getIndexWrtier(Directory dir,
IndexWriterConfig config) {
return manager.getIndexWriter(dir, config);
}

/**
* 获取IndexWriter
*
* @param dir
* @param config
* @return
*/
public static IndexWriter getIndexWrtier(String directoryPath,
IndexWriterConfig config) {
FSDirectory directory = openFSDirectory(directoryPath);
return manager.getIndexWriter(directory, config);
}

/**
* 获取IndexReader
*
* @param dir
* @param enableNRTReader
*            是否开启NRTReader
* @return
*/
public static IndexReader getIndexReader(Directory dir,
boolean enableNRTReader) {
return manager.getIndexReader(dir, enableNRTReader);
}

/**
* 获取IndexReader(默认不启用NRTReader)
*
* @param dir
* @return
*/
public static IndexReader getIndexReader(Directory dir) {
return manager.getIndexReader(dir);
}

/**
* 获取IndexSearcher
*
* @param reader
*            IndexReader对象
* @param executor
*            如果你需要开启多线程查询,请提供ExecutorService对象参数
* @return
*/
public static IndexSearcher getIndexSearcher(IndexReader reader,
ExecutorService executor) {
return manager.getIndexSearcher(reader, executor);
}

/**
* 获取IndexSearcher(不支持多线程查询)
*
* @param reader
*            IndexReader对象
* @return
*/
public static IndexSearcher getIndexSearcher(IndexReader reader) {
return manager.getIndexSearcher(reader);
}

/**
* 删除索引[注意:请自己关闭IndexWriter对象]
*
* @param writer
* @param field
* @param value
*/
public static void deleteIndex(IndexWriter writer, String field,
String keyword) {
try {
writer.deleteDocuments(new Term[] { new Term(field, keyword) });
} catch (IOException e) {
e.printStackTrace();
}
}

/**
* 删除索引[注意:请自己关闭IndexWriter对象]
*
* @param writer
* @param term
*/
public static void deleteIndexs(IndexWriter writer, Term[] terms) {
try {
writer.deleteDocuments(terms);
} catch (IOException e) {
e.printStackTrace();
}
}

/**
* 删除索引[注意:请自己关闭IndexWriter对象]
*
* @param writer
* @param field
* @param value
*/
public static void deleteIndex(IndexWriter writer, Term term) {
try {
writer.deleteDocuments(new Term[] { term });
} catch (IOException e) {
e.printStackTrace();
}
}

/**
* 批量删除索引[注意:请自己关闭IndexWriter对象]
*
* @param writer
* @param querys
*/
public static void deleteIndexs(IndexWriter writer, Query[] querys) {
try {
writer.deleteDocuments(querys);
} catch (IOException e) {
e.printStackTrace();
}
}

/**
* 删除索引[注意:请自己关闭IndexWriter对象]
*
* @param writer
* @param query
*/
public static void deleteIndex(IndexWriter writer, Query query) {
try {
writer.deleteDocuments(query);
} catch (IOException e) {
e.printStackTrace();
}
}

/**
* 删除所有索引文档
*
* @param writer
*/
public static void deleteAllIndex(IndexWriter writer) {
try {
writer.deleteAll();
} catch (IOException e) {
e.printStackTrace();
}
}

/**
* Term term = new Term("id","1234567");
* 先去索引文件里查找id为1234567的Doc,如果有就更新它(如果有多条,最后更新后只有一条)。如果没有就新增.
* 数据库更新的时候,我们可以只针对某个列来更新,而lucene只能针对一行数据更新。
*
* @param writer
* @param term
* @param document
*/
public static void updateIndex(IndexWriter writer, Term term,
Document document) {
try {
writer.updateDocument(term, document);
} catch (IOException e) {
e.printStackTrace();
}
}

/**
* 添加索引文档
*
* @param writer
* @param doc
*/
public static void addIndex(IndexWriter writer, Document document) {
updateIndex(writer, null, document);
// writer.addDocument(document);
}

/**
* 批量添加索引文档
*
* @param writer
* @param doc
*/
public static void addIndex(IndexWriter writer, List<Document> documents) {
try {
writer.addDocuments(documents);
} catch (IOException e) {
e.printStackTrace();
}
}

/**
* 将内存的数据写至外存
*
* @param writer
* @param ramDirectory
* @param fsramDirectory
* @param analyzer
*/
public static void OptimizeRAMToFSDirectory(IndexWriter writer,
Directory fsDirectory, Analyzer analyzer) {
try {
writer.addIndexes(new Directory[] { fsDirectory });
} catch (IOException e) {
e.printStackTrace();
}
}

/**
* 索引文档查询
*
* @param searcher
* @param query
* @param sort
* @return
*/
public static List<Document> query(IndexSearcher searcher, Query query,
Integer rows, Sort sort, Filter filter) {
TopDocs topDocs = null;
List<Document> docList = null;
try {
if (sort != null) {
if (filter != null) {
topDocs = searcher.search(query, filter,
(rows == null ? Integer.MAX_VALUE : rows), sort);
} else {
topDocs = searcher.search(query,
(rows == null ? Integer.MAX_VALUE : rows), sort);
}
} else {
if (filter != null) {
topDocs = searcher.search(query, filter,
(rows == null ? Integer.MAX_VALUE : rows));
} else {
topDocs = searcher.search(query,
(rows == null ? Integer.MAX_VALUE : rows));
}
}
ScoreDoc[] scores = topDocs.scoreDocs;
int length = scores.length;
if (length <= 0) {
return Collections.emptyList();
}
docList = new ArrayList<Document>();
for (int i = 0; i < length; i++) {
System.out.println(scores[i].score + " " + scores[i].doc);
Document doc = searcher.doc(scores[i].doc);
docList.add(doc);
}
} catch (IOException e) {
e.printStackTrace();
}
return docList;
}

/**
* 返回索引文档的总数[注意:请自己手动关闭IndexReader]
*
* @param reader
* @return
*/
public static int getIndexTotalCount(IndexReader reader) {
return reader.numDocs();
}

/**
* 返回索引文档中最大文档ID[注意:请自己手动关闭IndexReader]
*
* @param reader
* @return
*/
public static int getMaxDocId(IndexReader reader) {
return reader.maxDoc();
}

/**
* 返回已经删除尚未提交的文档总数[注意:请自己手动关闭IndexReader]
*
* @param reader
* @return
*/
public static int getDeletedDocNum(IndexReader reader) {
return getMaxDocId(reader) - getIndexTotalCount(reader);
}

/**
* 根据docId查询索引文档
*
* @param reader
*            IndexReader对象
* @param docID
*            documentId
* @param fieldsToLoad
*            需要返回的field
* @return
*/
public static Document findDocumentByDocId(IndexReader reader, int docID,
Set<String> fieldsToLoad) {
try {
return reader.document(docID, fieldsToLoad);
} catch (IOException e) {
return null;
}
}

/**
* 根据docId查询索引文档
*
* @param reader
*            IndexReader对象
* @param docID
*            documentId
* @return
*/
public static Document findDocumentByDocId(IndexReader reader, int docID) {
return findDocumentByDocId(reader, docID, null);
}

/**
* @Title: createHighlighter
* @Description: 创建高亮器
* @param query
*            索引查询对象
* @param prefix
*            高亮前缀字符串
* @param stuffix
*            高亮后缀字符串
* @param fragmenterLength
*            摘要最大长度
* @return
*/
public static Highlighter createHighlighter(Query query, String prefix,
String stuffix, int fragmenterLength) {
Formatter formatter = new SimpleHTMLFormatter((prefix == null || prefix
.trim().length() == 0) ? "<font color=\"red\">" : prefix,
(stuffix == null || stuffix.trim().length() == 0) ? "</font>"
: stuffix);
Scorer fragmentScorer = new QueryScorer(query);
Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
Fragmenter fragmenter = new SimpleFragmenter(fragmenterLength <= 0 ? 50
: fragmenterLength);
highlighter.setTextFragmenter(fragmenter);
return highlighter;
}

/**
* @Title: highlight
* @Description: 生成高亮文本
* @param document
*            索引文档对象
* @param highlighter
*            高亮器
* @param analyzer
*            索引分词器
* @param field
*            高亮字段
* @return
* @throws IOException
* @throws InvalidTokenOffsetsException
*/
public static String highlight(Document document, Highlighter highlighter,
Analyzer analyzer, String field) throws IOException {
List<IndexableField> list = document.getFields();
for (IndexableField fieldable : list) {
String fieldValue = fieldable.stringValue();
if (fieldable.name().equals(field)) {
try {
fieldValue = highlighter.getBestFragment(analyzer, field,
fieldValue);
} catch (InvalidTokenOffsetsException e) {
fieldValue = fieldable.stringValue();
}
return (fieldValue == null || fieldValue.trim().length() == 0) ? fieldable
.stringValue() : fieldValue;
}
}
return null;
}

/**
* @Title: searchTotalRecord
* @Description: 获取符合条件的总记录数
* @param query
* @return
* @throws IOException
*/
public static int searchTotalRecord(IndexSearcher search, Query query) {
ScoreDoc[] docs = null;
try {
TopDocs topDocs = search.search(query, Integer.MAX_VALUE);
if (topDocs == null || topDocs.scoreDocs == null
|| topDocs.scoreDocs.length == 0) {
return 0;
}
docs = topDocs.scoreDocs;
} catch (IOException e) {
e.printStackTrace();
}
return docs.length;
}

/**
* @Title: pageQuery
* @Description: Lucene分页查询
* @param searcher
* @param query
* @param page
* @throws IOException
*/
public static void pageQuery(IndexSearcher searcher, Directory directory,
Query query, Page<Document> page) {
int totalRecord = searchTotalRecord(searcher, query);
// 设置总记录数
page.setTotalRecord(totalRecord);
TopDocs topDocs = null;
try {
topDocs = searcher.searchAfter(page.getAfterDoc(), query,
page.getPageSize());
} catch (IOException e) {
e.printStackTrace();
}
List<Document> docList = new ArrayList<Document>();
ScoreDoc[] docs = topDocs.scoreDocs;
int index = 0;
for (ScoreDoc scoreDoc : docs) {
int docID = scoreDoc.doc;
Document document = null;
try {
document = searcher.doc(docID);
} catch (IOException e) {
e.printStackTrace();
}
if (index == docs.length - 1) {
page.setAfterDoc(scoreDoc);
page.setAfterDocId(docID);
}
docList.add(document);
index++;
}
page.setItems(docList);
closeIndexReader(searcher.getIndexReader());
}

/**
* @Title: pageQuery
* @Description: 分页查询[如果设置了高亮,则会更新索引文档]
* @param searcher
* @param directory
* @param query
* @param page
* @param highlighterParam
* @param writerConfig
* @throws IOException
*/
public static void pageQuery(IndexSearcher searcher, Directory directory,
Query query, Page<Document> page,
HighlighterParam highlighterParam, IndexWriterConfig writerConfig)
throws IOException {
IndexWriter writer = null;
// 若未设置高亮
if (null == highlighterParam || !highlighterParam.isHighlight()) {
pageQuery(searcher, directory, query, page);
} else {
int totalRecord = searchTotalRecord(searcher, query);
System.out.println("totalRecord:" + totalRecord);
// 设置总记录数
page.setTotalRecord(totalRecord);
TopDocs topDocs = searcher.searchAfter(page.getAfterDoc(), query,
page.getPageSize());
List<Document> docList = new ArrayList<Document>();
ScoreDoc[] docs = topDocs.scoreDocs;
int index = 0;
writer = getIndexWrtier(directory, writerConfig);
for (ScoreDoc scoreDoc : docs) {
int docID = scoreDoc.doc;
Document document = searcher.doc(docID);
String content = document.get(highlighterParam.getFieldName());
if (null != content && content.trim().length() > 0) {
// 创建高亮器
Highlighter highlighter = LuceneUtil.createHighlighter(
query, highlighterParam.getPrefix(),
highlighterParam.getStuffix(),
highlighterParam.getFragmenterLength());
String text = highlight(document, highlighter, analyzer,
highlighterParam.getFieldName());
// 若高亮后跟原始文本不相同,表示高亮成功
if (!text.equals(content)) {
Document tempdocument = new Document();
List<IndexableField> indexableFieldList = document
.getFields();
if (null != indexableFieldList
&& indexableFieldList.size() > 0) {
for (IndexableField field : indexableFieldList) {
if (field.name().equals(
highlighterParam.getFieldName())) {
tempdocument.add(new TextField(
field.name(), text, Store.YES));
} else {
tempdocument.add(field);
}
}
}
updateIndex(writer,
new Term(highlighterParam.getFieldName(),
content), tempdocument);
document = tempdocument;
}
}
if (index == docs.length - 1) {
page.setAfterDoc(scoreDoc);
page.setAfterDocId(docID);
}
docList.add(document);
index++;
}
page.setItems(docList);
}
closeIndexReader(searcher.getIndexReader());
closeIndexWriter(writer);
}

/**
* 创建QueryParser
*
* @return
*/
public static QueryParser createCustomQueryParser(String field,
Analyzer analyzer) {
return manager.createQueryParser(field, analyzer);
}

/**
* 创建多field的QueryParser
*
* @param fields
* @param analyzer
* @return
*/
public static QueryParser createMultiFieldQueryParser(String[] fields,
Analyzer analyzer) {
return manager.createMultiFieldQueryParser(fields, analyzer);
}

/**
* 可以在查询的时候获取TermVector
*
* @return
*/
public static Field storeVectorTextField(String name, String value,
Store store) {
FieldType type = new FieldType();
type.setStored(true);
type.setStoreTermVectors(true);
type.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
return new Field(name, value, type);
}

}


package com.whf.pojo;

import java.io.IOException;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.queries.CustomScoreProvider;
import org.apache.lucene.queries.CustomScoreQuery;
import org.apache.lucene.search.Query;

public class MyCountQuery extends CustomScoreQuery{

private String field=null;

private class MyCountQueryScoreProvider extends CustomScoreProvider{

private String field=null;

public MyCountQueryScoreProvider(LeafReaderContext context) {
super(context);
}

public MyCountQueryScoreProvider(LeafReaderContext context, String field) {
super(context);
this.field = field;
}

@Override
public float customScore(int arg0, float arg1, float[] arg2)
throws IOException {
IndexReader reader = context.reader();
Terms tv = reader.getTermVector(arg0, field);
TermsEnum termsEnum = null;
int numTerms = 0;
if (tv != null) {
termsEnum = tv.iterator();
while ((termsEnum.next()) != null) {
numTerms++;
}
}
return (float) (numTerms);
}

}

public MyCountQuery(Query subQuery) {
super(subQuery);
}

public MyCountQuery(Query subQuery, String field) {
super(subQuery);
this.field = field;
}

@Override
protected CustomScoreProvider getCustomScoreProvider(
LeafReaderContext context) throws IOException {
return new MyCountQueryScoreProvider(context, this.field);
}

}


package com.whf.pojo;

/**
* @ClassName: HighlighterParam
* @Description: 高亮器参数对象
* @author Lanxiaowei
* @date 2014-3-30 下午12:22:08
*/
public class HighlighterParam {
/** 是否需要设置高亮 */
private boolean highlight;
/** 需要设置高亮的属性名 */
private String fieldName;
/** 高亮前缀 */
private String prefix;
/** 高亮后缀 */
private String stuffix;
/** 显示摘要最大长度 */
private int fragmenterLength;

public boolean isHighlight() {
return highlight;
}

public void setHighlight(boolean highlight) {
this.highlight = highlight;
}

public String getFieldName() {
return fieldName;
}

public void setFieldName(String fieldName) {
this.fieldName = fieldName;
}

public String getPrefix() {
return prefix;
}

public void setPrefix(String prefix) {
this.prefix = prefix;
}

public String getStuffix() {
return stuffix;
}

public void setStuffix(String stuffix) {
this.stuffix = stuffix;
}

public int getFragmenterLength() {
return fragmenterLength;
}

public void setFragmenterLength(int fragmenterLength) {
this.fragmenterLength = fragmenterLength;
}

public HighlighterParam(boolean highlight, String fieldName, String prefix,
String stuffix, int fragmenterLength) {
this.highlight = highlight;
this.fieldName = fieldName;
this.prefix = prefix;
this.stuffix = stuffix;
this.fragmenterLength = fragmenterLength;
}

public HighlighterParam(boolean highlight, String fieldName,
int fragmenterLength) {
this.highlight = highlight;
this.fieldName = fieldName;
this.fragmenterLength = fragmenterLength;
}

public HighlighterParam(boolean highlight, String fieldName, String prefix,
String stuffix) {
this.highlight = highlight;
this.fieldName = fieldName;
this.prefix = prefix;
this.stuffix = stuffix;
}

public HighlighterParam() {
}
}


package com.whf.pojo;

import java.io.IOException;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.queries.CustomScoreProvider;
import org.apache.lucene.queries.CustomScoreQuery;
import org.apache.lucene.search.Query;

public class MyCountQuery extends CustomScoreQuery{

private String field=null;

private class MyCountQueryScoreProvider extends CustomScoreProvider{

private String field=null;

public MyCountQueryScoreProvider(LeafReaderContext context) {
super(context);
}

public MyCountQueryScoreProvider(LeafReaderContext context, String field) {
super(context);
this.field = field;
}

@Override
public float customScore(int arg0, float arg1, float[] arg2)
throws IOException {
IndexReader reader = context.reader();
Terms tv = reader.getTermVector(arg0, field);
TermsEnum termsEnum = null;
int numTerms = 0;
if (tv != null) {
termsEnum = tv.iterator();
while ((termsEnum.next()) != null) {
numTerms++;
}
}
return (float) (numTerms);
}

}

public MyCountQuery(Query subQuery) {
super(subQuery);
}

public MyCountQuery(Query subQuery, String field) {
super(subQuery);
this.field = field;
}

@Override
protected CustomScoreProvider getCustomScoreProvider(
LeafReaderContext context) throws IOException {
return new MyCountQueryScoreProvider(context, this.field);
}

}


package com.whf.pojo;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.ScoreDoc;

public class Page<T> {
/** 当前第几页(从1开始计算) */
private int currentPage;
/** 每页显示几条 */
private int pageSize;
/** 总记录数 */
private int totalRecord;
/** 总页数 */
private int totalPage;
/** 分页数据集合[用泛型T来限定集合元素类型] */
private Collection<T> items;
/** 当前显示起始索引(从零开始计算) */
private int startIndex;
/** 当前显示结束索引(从零开始计算) */
private int endIndex;
/** 一组最多显示几个页码[比如Google一组最多显示10个页码] */
private int groupSize;
/** 左边偏移量 */
private int leftOffset = 5;
/** 右边偏移量 */
private int rightOffset = 4;
/** 当前页码范围 */
private String[] pageRange;
/** 分页数据 */
private List<Document> docList;
/** 上一页最后一个ScoreDoc对象 */
private ScoreDoc afterDoc;
/** 上一页最后一个ScoreDoc对象的Document对象ID */
private int afterDocId;

public void setRangeIndex() {
int groupSize = getGroupSize();
int totalPage = getTotalPage();
if (totalPage < 2) {
startIndex = 0;
endIndex = totalPage - startIndex;
} else {
int currentPage = getCurrentPage();
if (groupSize >= totalPage) {
startIndex = 0;
endIndex = totalPage - startIndex - 1;
} else {
int leftOffset = getLeftOffset();
int middleOffset = getMiddleOffset();
if (-1 == middleOffset) {
startIndex = 0;
endIndex = groupSize - 1;
} else if (currentPage <= leftOffset) {
startIndex = 0;
endIndex = groupSize - 1;
} else {
startIndex = currentPage - leftOffset - 1;
if (currentPage + rightOffset > totalPage) {
endIndex = totalPage - 1;
} else {
endIndex = currentPage + rightOffset - 1;
}
}
}
}
}

public int getCurrentPage() {
if (currentPage <= 0) {
currentPage = 1;
} else {
int totalPage = getTotalPage();
if (totalPage > 0 && currentPage > getTotalPage()) {
currentPage = totalPage;
}
}
return currentPage;
}

public void setCurrentPage(int currentPage) {
this.currentPage = currentPage;
}

public int getPageSize() {
if (pageSize <= 0) {
pageSize = 10;
}
return pageSize;
}

public void setPageSize(int pageSize) {
this.pageSize = pageSize;
}

public int getTotalRecord() {
return totalRecord;
}

public void setTotalRecord(int totalRecord) {
this.totalRecord = totalRecord;
}

public int getTotalPage() {
int totalRecord = getTotalRecord();
if (totalRecord == 0) {
totalPage = 0;
} else {
int pageSize = getPageSize();
totalPage = totalRecord % pageSize == 0 ? totalRecord / pageSize
: (totalRecord / pageSize) + 1;
}
return totalPage;
}

public void setTotalPage(int totalPage) {
this.totalPage = totalPage;
}

public int getStartIndex() {
return startIndex;
}

public void setStartIndex(int startIndex) {
this.startIndex = startIndex;
}

public int getEndIndex() {
return endIndex;
}

public void setEndIndex(int endIndex) {
this.endIndex = endIndex;
}

public int getGroupSize() {
if (groupSize <= 0) {
groupSize = 10;
}
return groupSize;
}

public void setGroupSize(int groupSize) {
this.groupSize = groupSize;
}

public int getLeftOffset() {
leftOffset = getGroupSize() / 2;
return leftOffset;
}

public void setLeftOffset(int leftOffset) {
this.leftOffset = leftOffset;
}

public int getRightOffset() {
int groupSize = getGroupSize();
if (groupSize % 2 == 0) {
rightOffset = (groupSize / 2) - 1;
} else {
rightOffset = groupSize / 2;
}
return rightOffset;
}

public void setRightOffset(int rightOffset) {
this.rightOffset = rightOffset;
}

/** 中心位置索引[从1开始计算] */
public int getMiddleOffset() {
int groupSize = getGroupSize();
int totalPage = getTotalPage();
if (groupSize >= totalPage) {
return -1;
}
return getLeftOffset() + 1;
}

public String[] getPageRange() {
setRangeIndex();
int size = endIndex - startIndex + 1;
if (size <= 0) {
return new String[0];
}
if (totalPage == 1) {
return new String[] { "1" };
}
pageRange = new String[size];
for (int i = 0; i < size; i++) {
pageRange[i] = (startIndex + i + 1) + "";
}
return pageRange;
}

public void setPageRange(String[] pageRange) {
this.pageRange = pageRange;
}

public Collection<T> getItems() {
return items;
}

public void setItems(Collection<T> items) {
this.items = items;
}

public List<Document> getDocList() {
return docList;
}

public void setDocList(List<Document> docList) {
this.docList = docList;
}

public ScoreDoc getAfterDoc() {
setAfterDocId(afterDocId);
return afterDoc;
}

public void setAfterDoc(ScoreDoc afterDoc) {
this.afterDoc = afterDoc;
}

public int getAfterDocId() {
return afterDocId;
}

public void setAfterDocId(int afterDocId) {
this.afterDocId = afterDocId;
if (null == afterDoc) {
this.afterDoc = new ScoreDoc(afterDocId, 1.0f);
}
}

public Page() {
}

public Page(int currentPage, int pageSize) {
this.currentPage = currentPage;
this.pageSize = pageSize;
}

public Page(int currentPage, int pageSize, Collection<T> items) {
this.currentPage = currentPage;
this.pageSize = pageSize;
this.items = items;
}

public Page(int currentPage, int pageSize, Collection<T> items,
int groupSize) {
this.currentPage = currentPage;
this.pageSize = pageSize;
this.items = items;
this.groupSize = groupSize;
}

public Page(int currentPage, int pageSize, int groupSize, int afterDocId) {
this.currentPage = currentPage;
this.pageSize = pageSize;
this.groupSize = groupSize;
this.afterDocId = afterDocId;
}

public static void main(String[] args) {
Collection<Integer> items = new ArrayList<Integer>();
int totalRecord = 201;
for (int i = 0; i < totalRecord; i++) {
items.add(new Integer(i));
}
Page<Integer> page = new Page<Integer>(1, 10, items, 10);
page.setTotalRecord(totalRecord);
int totalPage = page.getTotalPage();
for (int i = 0; i < totalPage; i++) {
page.setCurrentPage(i + 1);
String[] pageRange = page.getPageRange();
System.out.println("当前第" + page.currentPage + "页");
for (int j = 0; j < pageRange.length; j++) {
System.out.print(pageRange[j] + "  ");
}
System.out.println("\n");
}
}
}


package com.whf.demo;

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;
import org.apache.lucene.search.suggest.document.SuggestField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Version;

import com.whf.pojo.MyCountQuery;

public class Main {

public static final String path = "F://index_data";

/**
* 优化:
* @param args
* @throws IOException
* @throws ParseException
*/
public static void main(String[] args) throws IOException, ParseException {

//      Directory directory = LuceneUtil.openFSDirectory(path);
//      Analyzer analyzer = new StandardAnalyzer();
//      IndexWriterConfig config = new IndexWriterConfig(
//              analyzer);
//      //有效提高效率,下次插入生效,默认16M
//      config.setRAMBufferSizeMB(256);
//      //缓存中文档的数目,不足时写入磁盘
//      config.setMaxBufferedDocs(200);

//      IndexWriter writer = LuceneUtil.getIndexWrtier(directory, config);
//      try {
//
//          Document doc1 = new Document();
//          doc1.add(LuceneUtil.storeVectorTextField("name", "this is a demo", Store.YES));
//          doc1.add(LuceneUtil.storeVectorTextField("desc", "whf", Store.YES));
//
//          Document doc2 = new Document();
//          doc2.add(LuceneUtil.storeVectorTextField("name", "this is a test", Store.YES));
//          doc2.add(LuceneUtil.storeVectorTextField("desc", "smx", Store.YES));
//
////            writer.updateDocument(new Term("name", "very"), doc);
//          writer.addDocument(doc1);
//          writer.addDocument(doc2);
//          //强制合并段,提高检索速度
//          writer.forceMerge(5);
//      } catch (Exception e) {
//          e.printStackTrace();
//      } finally {
//          writer.commit();
//          writer.close();
//      }
//      IndexReader reader = LuceneUtil.getIndexReader(directory);
//      IndexSearcher searcher = LuceneUtil.getIndexSearcher(reader);
//      QueryParser parser = new MultiFieldQueryParser(new String[] { "name", "desc" }, analyzer);
//      Query query = new MatchAllDocsQuery();
//      Query myquery=new MyCountQuery(query, "name");
//      TopDocs tops = searcher.search(myquery, 100);
//      System.out.println("*****************");
////        System.out.println(tops.totalHits);
//      ScoreDoc scoreDoc[] = tops.scoreDocs;
//      for (int i = 0; i < scoreDoc.length; i++) {
//          System.out.println(scoreDoc[i].doc);
////            Document doc = searcher.doc(scoreDoc[i].doc);
////            System.out.println(scoreDoc[i].score);
////            System.out.println(doc.getFields());
////            for(IndexableField field:doc){
////                System.out.println(field.name()+" "+field.stringValue());
////            }
//      }

}

// TermQuery、BooleanQuery、PhraseQuery、PrefixQuery、
// RangeQuery、MultiTermQuery、FilteredQuery、SpanQuery
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: