您的位置:首页 > 其它

Lucene6入门教程(七)多目录多线程索引/检索

2017-09-06 12:05 507 查看
在使用lucene6 的时候,会遇到多目录多线程的索引构建和检索(查询)情况。

把不同类型的数据放到同一个索引文件中,这是不现实的,而且,除了内存索引,创建多目录索引也能提高索引速度,这是因为,一个索引目录只能有一把锁,那如果有多个索引目录,每个索引目录发一把锁,N个索引目录同时进行索引写操作就有意义了。

而检索的时候,也需要多线程并发来提高搜索速度。

以上,就可以用到多目录多线程问题了,本文主要参考solarrrr的博客文,

地址为:[]http://blog.csdn.net/asdfsadfasdfsa/article/details/77649108]

全部建好类,才好操作的

下边是代码,首先是LuceneUtils工具类,封装各个对象,是为防止Index和Search过程时候的读冲突或写冲突等做准备的。

import java.io.IOException;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ExecutorService;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lu
4000
cene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

/**
* Lucene工具类(基于Lucene6封装)
* @author Lanxiaowei
*
*/
public class LuceneUtils {
private static final LuceneManager luceneManager = LuceneManager.getInstance();
//public static Analyzer analyzer = new AnsjAnalyzer(true);
public static Analyzer analyzer = new StandardAnalyzer();
/**
* 打开索引目录
*
* @param luceneDir
* @return
* @throws IOException
*/
public static FSDirectory openFSDirectory(String luceneDir) {
FSDirectory directory = null;
try {
directory = FSDirectory.open(Paths.get(luceneDir));
/**
* 注意:isLocked方法内部会试图去获取Lock,如果获取到Lock,会关闭它,否则return false表示索引目录没有被锁,
* 这也就是为什么unlock方法被从IndexWriter类中移除的原因
*/
IndexWriter.isLocked(directory);
} catch (IOException e) {
e.printStackTrace();
}
return directory;
}

/**
* 关闭索引目录并销毁
* @param directory
* @throws IOException
*/
public static void closeDirectory(Directory directory) throws IOException {
if (null != directory) {
directory.close();
directory = null;
}
}

/**
* 获取IndexWriter
* @param dir
* @param config
* @return
*/
public static IndexWriter getIndexWriter(Directory dir, IndexWriterConfig config) {
return luceneManager.getIndexWriter(dir, config);
}

/**
* 获取IndexWriter
// * @param dir
* @param config
* @return
*/
public static IndexWriter getIndexWrtier(String directoryPath, IndexWriterConfig config) {
FSDirectory directory = openFSDirectory(directoryPath);
return luceneManager.getIndexWriter(directory, config);
}

/**
* 获取IndexReader
* @param dir
* @param enableNRTReader  是否开启NRTReader
* @return
*/
public static IndexReader getIndexReader(Directory dir,boolean enableNRTReader) {
return luceneManager.getIndexReader(dir, enableNRTReader);
}

/**
* 获取IndexReader(默认不启用NRTReader)
* @param dir
* @return
*/
public static IndexReader getIndexReader(Directory dir) {
return luceneManager.getIndexReader(dir);
}

/**
* 获取IndexSearcher
* @param reader    IndexReader对象
* @param executor  如果你需要开启多线程查询,请提供ExecutorService对象参数
* @return
*/
public static IndexSearcher getIndexSearcher(IndexReader reader,ExecutorService executor) {
return luceneManager.getIndexSearcher(reader, executor);
}

/**
* 获取IndexSearcher(不支持多线程查询)
* @param reader    IndexReader对象
* @return
*/
public static IndexSearcher getIndexSearcher(IndexReader reader) {
return luceneManager.getIndexSearcher(reader);
}

/**
* 创建QueryParser对象
* @param field
* @param analyzer
* @return
*/
public static QueryParser createQueryParser(String field, Analyzer analyzer) {
return new QueryParser(field, analyzer);
}

/**
* 关闭IndexReader
* @param reader
*/
public static void closeIndexReader(IndexReader reader) {
if (null != reader) {
try {
reader.close();
reader = null;
} catch (IOException e) {
e.printStackTrace();
}
}
}

/**
* 关闭IndexWriter
* @param writer
*/
public static void closeIndexWriter(IndexWriter writer) {
luceneManager.closeIndexWriter(writer);
}

/**
* 关闭IndexReader和IndexWriter
* @param reader
* @param writer
*/
public static void closeAll(IndexReader reader, IndexWriter writer) {
closeIndexReader(reader);
closeIndexWriter(writer);
}

/**
* 删除索引[注意:请自己关闭IndexWriter对象]
* @param writer
* @param field
* @param value
*/
public static void deleteIndex(IndexWriter writer, String field, String value) {
try {
writer.deleteDocuments(new Term[] {new Term(field,value)});
} catch (IOException e) {
e.printStackTrace();
}
}

/**
* 删除索引[注意:请自己关闭IndexWriter对象]
* @param writer
* @param query
*/
public static void deleteIndex(IndexWriter writer, Query query) {
try {
writer.deleteDocuments(query);
} catch (IOException e) {
e.printStackTrace();
}
}

/**
* 批量删除索引[注意:请自己关闭IndexWriter对象]
* @param writer
* @param terms
*/
public static void deleteIndexs(IndexWriter writer,Term[] terms) {
try {
writer.deleteDocuments(terms);
} catch (IOException e) {
e.printStackTrace();
}
}

/**
* 批量删除索引[注意:请自己关闭IndexWriter对象]
* @param writer
* @param querys
*/
public static void deleteIndexs(IndexWriter writer,Query[] querys) {
try {
writer.deleteDocuments(querys);
} catch (IOException e) {
e.printStackTrace();
}
}

/**
* 删除所有索引文档
* @param writer
*/
public static void deleteAllIndex(IndexWriter writer) {
try {
writer.deleteAll();
} catch (IOException e) {
e.printStackTrace();
}
}

/**
* 更新索引文档
* @param writer
* @param term
* @param document
*/
public static void updateIndex(IndexWriter writer,Term term,Document document) {
try {
writer.updateDocument(term, document);
} catch (IOException e) {
e.printStackTrace();
}
}

/**
* 更新索引文档
* @param writer
//  * @param term
* @param document
*/
public static void updateIndex(IndexWriter writer,String field,String value,Document document) {
updateIndex(writer, new Term(field, value), document);
}

/**
* 添加索引文档
* @param writer
//* @param doc
*/
public static void addIndex(IndexWriter writer, Document document) {
updateIndex(writer, null, document);
}

/**
* 索引文档查询
* @param searcher
* @param query
* @return
*/
public static List<Document> query(IndexSearcher searcher,Query query) {
TopDocs topDocs = null;
try {
topDocs = searcher.search(query, Integer.MAX_VALUE);
} catch (IOException e) {
e.printStackTrace();
}
ScoreDoc[] scores = topDocs.scoreDocs;
int length = scores.length;
if (length <= 0) {
return Collections.emptyList();
}
List<Document> docList = new ArrayList<Document>();
try {
for (int i = 0; i < length; i++) {
Document doc = searcher.doc(scores[i].doc);
docList.add(doc);
}
} catch (IOException e) {
e.printStackTrace();
}
return docList;
}

public static List<Document> queryp(IndexSearcher searcher,  Query query) {
TopDocs topDocs = null;
try {
topDocs = searcher.search(query, Integer.MAX_VALUE);
} catch (IOException e) {
e.printStackTrace();
}
ScoreDoc[] scores = topDocs.scoreDocs;
int length = scores.length;
if (length <= 0) {
return Collections.emptyList();
}
List<Document> docList = new ArrayList<Document>();
try
20c68
{
for (int i = 0; i < length; i++) {
Document doc = searcher.doc(scores[i].doc);
docList.add(doc);
}
} catch (IOException e) {
e.printStackTrace();
}
return docList;
}

/**
* 返回索引文档的总数[注意:请自己手动关闭IndexReader]
* @param reader
* @return
*/
public static int getIndexTotalCount(IndexReader reader) {
return reader.numDocs();
}

/**
* 返回索引文档中最大文档ID[注意:请自己手动关闭IndexReader]
* @param reader
* @return
*/
public static int getMaxDocId(IndexReader reader) {
return reader.maxDoc();
}

/**
* 返回已经删除尚未提交的文档总数[注意:请自己手动关闭IndexReader]
* @param reader
* @return
*/
public static int getDeletedDocNum(IndexReader reader) {
return getMaxDocId(reader) - getIndexTotalCount(reader);
}

/**
* 根据docId查询索引文档
* @param reader         IndexReader对象
* @param docID          documentId
* @param fieldsToLoad   需要返回的field
* @return
*/
public static Document findDocumentByDocId(IndexReader reader,int docID, Set<String> fieldsToLoad) {
try {
return reader.document(docID, fieldsToLoad);
} catch (IOException e) {
return null;
}
}

/**
* 根据docId查询索引文档
* @param reader         IndexReader对象
* @param docID          documentId
* @return
*/
public static Document findDocumentByDocId(IndexReader reader,int docID) {
return findDocumentByDocId(reader, docID, null);
}

/**
* @Title: createHighlighter
* @Description: 创建高亮器
* @param query             索引查询对象
* @param prefix            高亮前缀字符串
* @param stuffix           高亮后缀字符串
* @param fragmenterLength  摘要最大长度
* @return
*/
public static Highlighter createHighlighter(Query query, String prefix, String stuffix, int fragmenterLength) {
Formatter formatter = new SimpleHTMLFormatter((prefix == null || prefix.trim().length() == 0) ?
"<font color=\"red\">" : prefix, (stuffix == null || stuffix.trim().length() == 0)?"</font>" : stuffix);
Scorer fragmentScorer = new QueryScorer(query);
Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
Fragmenter fragmenter = new SimpleFragmenter(fragmenterLength <= 0 ? 50 : fragmenterLength);
highlighter.setTextFragmenter(fragmenter);
return highlighter;
}

/**
* @Title: highlight
* @Description: 生成高亮文本
* @param document          索引文档对象
* @param highlighter       高亮器
* @param analyzer          索引分词器
* @param field             高亮字段
* @return
* @throws IOException
* @throws InvalidTokenOffsetsException
*/
public static String highlight(Document document,Highlighter highlighter,Analyzer analyzer,String field) throws IOException {
List<IndexableField> list = document.getFields();
for (IndexableField fieldable : list) {
String fieldValue = fieldable.stringValue();
if(fieldable.name().equals(field)) {
try {
fieldValue = highlighter.getBestFragment(analyzer, field, fieldValue);
} catch (InvalidTokenOffsetsException e) {
fieldValue = fieldable.stringValue();
}
return (fieldValue == null || fieldValue.trim().length() == 0)? fieldable.stringValue() : fieldValue;
}
}
return null;
}

/**
* @Title: searchTotalRecord
* @Description: 获取符合条件的总记录数
* @param query
* @return
* @throws IOException
*/
public static int searchTotalRecord(IndexSearcher search,Query query) {
ScoreDoc[] docs = null;
try {
TopDocs topDocs = search.search(query, Integer.MAX_VALUE);
if(topDocs == null || topDocs.scoreDocs == null || topDocs.scoreDocs.length == 0) {
return 0;
}
docs = topDocs.scoreDocs;
} catch (IOException e) {
e.printStackTrace();
}
return docs.length;
}

/**
* @Title: pageQuery
* @Description: Lucene分页查询
* @param searcher
* @param query
* @param page
* @throws IOException
*/

public static void pageQuery(IndexSearcher searcher,Directory directory,Query query,Page<Document> page) {
int totalRecord = searchTotalRecord(searcher,query);
//设置总记录数
page.setTotalRecord(totalRecord);
TopDocs topDocs = null;
try {
topDocs = searcher.searchAfter(page.getAfterDoc(),query, page.getPageSize());
} catch (IOException e) {
e.printStackTrace();
}
List<Document> docList = new ArrayList<Document>();
ScoreDoc[] docs = topDocs.scoreDocs;
int index = 0;
for (ScoreDoc scoreDoc : docs) {
int docID = scoreDoc.doc;
Document document = null;
try {
document = searcher.doc(docID);
} catch (IOException e) {
e.printStackTrace();
}
if(index == docs.length - 1) {
page.setAfterDoc(scoreDoc);
page.setAfterDocId(docID);
}
docList.add(document);
index++;
}
page.setItems(docList);
closeIndexReader(searcher.getIndexReader());
}

/**
* @Title: pageQuery
* @Description: 分页查询[如果设置了高亮,则会更新索引文档]
* @param searcher
* @param directory
* @param query
* @param page
* @param highlighterParam
* @param writerConfig
* @throws IOException
*/

public static void pageQuery(IndexSearcher searcher,Directory directory,Query query,Page<Document> page,HighlighterParam highlighterParam,IndexWriterConfig writerConfig) throws IOException {
IndexWriter writer = null;
//若未设置高亮
if(null == highlighterParam || !highlighterParam.isHighlight()) {
pageQuery(searcher,directory,query, page);
} else {
int totalRecord = searchTotalRecord(searcher,query);
System.out.println("totalRecord:" + totalRecord);
//设置总记录数
page.setTotalRecord(totalRecord);
TopDocs topDocs = searcher.searchAfter(page.getAfterDoc(),query, page.getPageSize());
List<Document> docList = new ArrayList<Document>();
ScoreDoc[] docs = topDocs.scoreDocs;
int index = 0;
writer = getIndexWriter(directory, writerConfig);
for (ScoreDoc scoreDoc : docs) {
int docID = scoreDoc.doc;
Document document = searcher.doc(docID);
String content = document.get(highlighterParam.getFieldName());
if(null != content && content.trim().length() > 0) {
//创建高亮器
Highlighter highlighter = LuceneUtils.createHighlighter(query,
highlighterParam.getPrefix(), highlighterParam.getStuffix(),
highlighterParam.getFragmenterLength());
String text = highlight(document, highlighter, analyzer, highlighterParam.getFieldName());
//若高亮后跟原始文本不相同,表示高亮成功
if(!text.equals(content)) {
Document tempdocument = new Document();
List<IndexableField> indexableFieldList = document.getFields();
if(null != indexableFieldList && indexableFieldList.size() > 0) {
for(IndexableField field : indexableFieldList) {
if(field.name().equals(highlighterParam.getFieldName())) {
tempdocument.add(new TextField(field.name(), text, Field.Store.YES));
} else {
tempdocument.add(field);
}
}
}
updateIndex(writer, new Term(highlighterParam.getFieldName(),content), tempdocument);
document = tempdocument;
}
}
if(index == docs.length - 1) {
page.setAfterDoc(scoreDoc);
page.setAfterDocId(docID);
}
docList.add(document);
index++;
}
page.setItems(docList);
}
closeIndexReader(searcher.getIndexReader());
closeIndexWriter(writer);
}

}


然后是一个LuceneManager类

import java.io.IOException;

import java.util.concurrent.ExecutorService;

import java.util.concurrent.locks.Lock;

import java.util.concurrent.locks.ReentrantLock;

import org.apache.lucene.index.DirectoryReader;

import org.apache.lucene.index.IndexReader;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.index.IndexWriterConfig;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.LockObtainFailedException;

/**

* Lucene索引读写器/查询器单例获取工具类

* @author Lanxiaowei

*

*/

public class LuceneManager {

private volatile static LuceneManager singleton; private volatile static IndexWriter writer; private volatile static IndexReader reader;

private volatile static IndexSearcher searcher;

private final Lock writerLock = new ReentrantLock();

//private final Lock readerLock = new ReentrantLock();

//private final Lock searcherLock = new ReentrantLock();

private static ThreadLocal<IndexWriter> writerLocal = new ThreadLocal<IndexWriter>();

private LuceneManager() {}

public static LuceneManager getInstance() {
if (null == singleton) {
synchronized (LuceneManager.class) {
if (null == singleton) {
singleton = new LuceneManager();
}
}
}
return singleton;
}

/**
* 获取IndexWriter单例对象
* @param dir
* @param config
* @return
*/
public IndexWriter getIndexWriter(Directory dir, IndexWriterConfig config) {
if(null == dir) {
throw new IllegalArgumentException("Directory can not be null.");
}
if(null == config) {
throw new IllegalArgumentException("IndexWriterConfig can not be null.");
}
try {
writerLock.lock();
writer = writerLocal.get();
if(null != writer) {
return writer;
}
if(null == writer){            //如果索引目录被锁,则直接抛异常
if(IndexWriter.isLocked(dir)) {
throw new LockObtainFailedException("Directory of index had been locked.");
}
writer = new IndexWriter(dir, config);
writerLocal.set(writer);
}
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
writerLock.unlock();
}
return writer;
}

/**
* 获取IndexWriter[可能为Null]
* @return
*/
public IndexWriter getIndexWriter() {
return writer;
}

/**
* 获取IndexReader对象
* @param dir
* @param enableNRTReader  是否开启NRTReader
* @return
*/
public IndexReader getIndexReader(Directory dir,boolean enableNRTReader) {
if(null == dir) {
throw new IllegalArgumentException("Directory can not be null.");
}
try {
if(null == reader){
reader = DirectoryReader.open(dir);
} else {
if(enableNRTReader && reader instanceof DirectoryReader) {
//开启近实时Reader,能立即看到动态添加/删除的索引变化
reader = DirectoryReader.openIfChanged((DirectoryReader)reader);
}
}
} catch (IOException e) {
e.printStackTrace();
}
return reader;
}

/**
* 获取IndexReader对象(默认不启用NETReader)
* @param dir
* @return
*/
public IndexReader getIndexReader(Directory dir) {
return getIndexReader(dir, false);
}

/**
* 获取IndexSearcher对象
* @param reader    IndexReader对象实例
* @param executor  如果你需要开启多线程查询,请提供ExecutorService对象参数
* @return
*/
public IndexSearcher getIndexSearcher(IndexReader reader,ExecutorService executor) {
if(null == reader) {
throw new IllegalArgumentException("The indexReader can not be null.");
}
if(null == searcher){
searcher = new IndexSearcher(reader);
}
return searcher;
}

/**
* 获取IndexSearcher对象(不支持多线程查询)
* @param reader    IndexReader对象实例
* @return
*/
public IndexSearcher getIndexSearcher(IndexReader reader) {
return getIndexSearcher(reader, null);
}

/**
* 关闭IndexWriter
* @param writer
*/
public void closeIndexWriter(IndexWriter writer) {
if(null != writer) {
try {
writer.close();
writer = null;
writerLocal.remove();
} catch (IOException e) {
e.printStackTrace();
}
}
}


}

多线程索引的类IndexCreator,防止写冲突等

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.concurrent.CountDownLatch;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.*;

import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.FSDirectory;

/**
* 索引创建线程
* @author Lanxiaowei
*
*/
public class IndexCreator implements Runnable {
/**需要读取的文件存放目录*/
private String docPath;
/**索引文件存放目录*/
private String luceneDir;

private int threadCount;

private final CountDownLatch countDownLatch1;

private final CountDownLatch countDownLatch2;

public IndexCreator(String docPath, String luceneDir,int threadCount,CountDownLatch countDownLatch1,CountDownLatch countDownLatch2) {
super();
this.docPath = docPath;
this.luceneDir = luceneDir;
this.threadCount = threadCount;
this.countDownLatch1 = countDownLatch1;
this.countDownLatch2 = countDownLatch2;
}

public void run() {
IndexWriter writer = null;
try {
countDownLatch1.await();
Analyzer analyzer = LuceneUtils.analyzer;
FSDirectory directory = LuceneUtils.openFSDirectory(luceneDir);
IndexWriterConfig config = new IndexWriterConfig(analyzer);
config.setOpenMode(OpenMode.CREATE_OR_APPEND);
writer = LuceneUtils.getIndexWriter(directory, config);
try {
indexDocs(writer, Paths.get(docPath));
} catch (IOException e) {
e.printStackTrace();
}
} catch (InterruptedException e1) {
e1.printStackTrace();
} finally {
LuceneUtils.closeIndexWriter(writer);
countDownLatch2.countDown();
}
}

/**
*
* @param writer
*            索引写入器
* @param path
*            文件路径
* @throws IOException
*/
public static void indexDocs(final IndexWriter writer, Path path)
throws IOException {
// 如果是目录,查找目录下的文件
if (Files.isDirectory(path, new LinkOption[0])) {
System.out.println("directory");
Files.walkFileTree(path, new SimpleFileVisitor() {
@Override
public FileVisitResult visitFile(Object file,
BasicFileAttributes attrs) throws IOException {
Path path = (Path)file;
System.out.println(path.getFileName());
indexDoc(writer, path, attrs.lastModifiedTime().toMillis());
return FileVisitResult.CONTINUE;
}
});
} else {
indexDoc(writer, path,
Files.getLastModifiedTime(path, new LinkOption[0])
.toMillis());
}
}

/**
* 读取文件创建索引
*
* @param writer
*            索引写入器
* @param file
*            文件路径
* @param lastModified
*            文件最后一次修改时间
* @throws IOException
*/
public static void indexDoc(IndexWriter writer, Path file, long lastModified)
throws IOException {
InputStream stream = Files.newInputStream(file, new OpenOption[0]);
Document doc = new Document();

Field pathField = new StringField("path", file.toString(),
Field.Store.YES);
doc.add(pathField);

doc.add(new NumericDocValuesField("modified", lastModified));
doc.add(new LongPoint("modified", lastModified));
doc.add(new StoredField("modified", lastModified));
doc.add(new TextField("contents",intputStream2String(stream),Field.Store.YES));
//doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {
System.out.println("adding " + file);
writer.addDocument(doc);
} else {
System.out.println("updating " + file);
writer.updateDocument(new Term("path", file.toString()), doc);
}
writer.commit();
}

/**
* InputStream转换成String
* @param is    输入流对象
* @return
*/
private static String intputStream2String(InputStream is) {
BufferedReader bufferReader = null;
StringBuilder stringBuilder = new StringBuilder();
String line;
try {
bufferReader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8));
while ((line = bufferReader.readLine()) != null) {
stringBuilder.append(line + "\r\n");
}
} catch (IOException e) {
e.printStackTrace();
} finally {
if (bufferReader != null) {
try {
bufferReader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return stringBuilder.toString();
}
}


接着便是分页的类

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

import org.apache.lucene.document.Document;
import org.apache.lucene.search.ScoreDoc;
public class Page<T> {
/**当前第几页(从1开始计算)*/
private int currentPage;
/**每页显示几条*/
private int pageSize;
/**总记录数*/
private int totalRecord;
/**总页数*/
private int totalPage;
/**分页数据集合[用泛型T来限定集合元素类型]*/
private Collection<T> items;
/**当前显示起始索引(从零开始计算)*/
private int startIndex;
/**当前显示结束索引(从零开始计算)*/
private int endIndex;
/**一组最多显示几个页码[比如Google一组最多显示10个页码]*/
private int groupSize;

/**左边偏移量*/
private int leftOffset = 5;
/**右边偏移量*/
private int rightOffset = 4;
/**当前页码范围*/
private String[] pageRange;

/**分页数据*/
private List<Document> docList;
/**上一页最后一个ScoreDoc对象*/
private ScoreDoc afterDoc;

/**上一页最后一个ScoreDoc对象的Document对象ID*/
private int afterDocId;

public void setRangeIndex() {
int groupSize = getGroupSize();
int totalPage = getTotalPage();
if(totalPage < 2) {
startIndex = 0;
endIndex = totalPage - startIndex;
} else {
int currentPage = getCurrentPage();
if(groupSize >= totalPage) {
startIndex = 0;
endIndex = totalPage - startIndex - 1;
} else {
int leftOffset = getLeftOffset();
int middleOffset = getMiddleOffset();
if(-1 == middleOffset) {
startIndex = 0;
endIndex = groupSize - 1;
} else if(currentPage <= leftOffset) {
startIndex = 0;
endIndex = groupSize - 1;
} else {
startIndex = currentPage - leftOffset - 1;
if(currentPage + rightOffset > totalPage) {
endIndex = totalPage - 1;
} else {
endIndex = currentPage + rightOffset - 1;
}
}
}
}
}

public int getCurrentPage() {
if(currentPage <= 0) {
currentPage = 1;
} else {
int totalPage = getTotalPage();
if(totalPage > 0 && currentPage > getTotalPage()) {
currentPage = totalPage;
}
}
return currentPage;
}
public void setCurrentPage(int currentPage) {
this.currentPage = currentPage;
}
public int getPageSize() {
if(pageSize <= 0) {
pageSize = 10;
}
return pageSize;
}
public void setPageSize(int pageSize) {
this.pageSize = pageSize;
}
public int getTotalRecord() {
return totalRecord;
}
public void setTotalRecord(int totalRecord) {
this.totalRecord = totalRecord;
}
public int getTotalPage() {
int totalRecord = getTotalRecord();
if(totalRecord == 0) {
totalPage = 0;
} else {
int pageSize = getPageSize();
totalPage = totalRecord % pageSize == 0 ? totalRecord / pageSize : (totalRecord / pageSize) + 1;
}
return totalPage;
}
public void setTotalPage(int totalPage) {
this.totalPage = totalPage;
}

public int getStartIndex() {
return startIndex;
}
public void setStartIndex(int startIndex) {
this.startIndex = startIndex;
}

public int getEndIndex() {
return endIndex;
}
public void setEndIndex(int endIndex) {
this.endIndex = endIndex;
}
public int getGroupSize() {
if(groupSize <= 0) {
groupSize = 10;
}
return groupSize;
}
public void setGroupSize(int groupSize) {
this.groupSize = groupSize;
}

public int getLeftOffset() {
leftOffset = getGroupSize() / 2;
return leftOffset;

}
public void setLeftOffset(int leftOffset) {
this.leftOffset = leftOffset;
}
public int getRightOffset() {
int groupSize = getGroupSize();
if(groupSize % 2 == 0) {
rightOffset = (groupSize / 2) - 1;
} else {
rightOffset = groupSize / 2;
}
return rightOffset;
}
public void setRightOffset(int rightOffset) {
this.rightOffset = rightOffset;
}

/**中心位置索引[从1开始计算]*/
public int getMiddleOffset() {
int groupSize = getGroupSize();
int totalPage = getTotalPage();
if(groupSize >= totalPage) {
return -1;
}
return getLeftOffset() + 1;
}
public String[] getPageRange() {
setRangeIndex();
int size = endIndex - startIndex + 1;
if(size <= 0) {
return new String[0];
}
if(totalPage == 1) {
return new String[] {"1"};
}
pageRange = new String[size];
for(int i=0; i < size; i++) {
pageRange[i] = (startIndex + i + 1) + "";
}
return pageRange;
}

public void setPageRange(String[] pageRange) {
this.pageRange = pageRange;
}

public Collection<T> getItems() {
return items;
}
public void setItems(Collection<T> items) {
this.items = items;
}

public List<Document> getDocList() {
return docList;
}

public void setDocList(List<Document> docList) {
this.docList = docList;
}

public ScoreDoc getAfterDoc() {
setAfterDocId(afterDocId);
return afterDoc;
}

public void setAfterDoc(ScoreDoc afterDoc) {
this.afterDoc = afterDoc;
}

public int getAfterDocId() {
return afterDocId;
}

public void setAfterDocId(int afterDocId) {
this.afterDocId = afterDocId;
if(null == afterDoc) {
this.afterDoc = new ScoreDoc(afterDocId, 1.0f);
}
}

public Page() {}

public Page(int currentPage, int pageSize) {
this.currentPage = currentPage;
this.pageSize = pageSize;
}

public Page(int currentPage, int pageSize, Collection<T> items) {
this.currentPage = currentPage;
this.pageSize = pageSize;
this.items = items;
}

public Page(int currentPage, int pageSize, Collection<T> items, int groupSize) {
this.currentPage = currentPage;
this.pageSize = pageSize;
this.items = items;
this.groupSize = groupSize;
}

public Page(int currentPage, int pageSize, int groupSize, int afterDocId) {
this.currentPage = currentPage;
this.pageSize = pageSize;
this.groupSize = groupSize;
this.afterDocId = afterDocId;
}

public static void main(String[] args) {
Collection<Integer> items = new ArrayList<Integer>();
int totalRecord = 201;
for(int i=0; i < totalRecord; i++) {
items.add(new Integer(i));
}
Page<Integer> page = new Page<Integer>(1,10,items,10);
page.setTotalRecord(totalRecord);
int totalPage = page.getTotalPage();
for(int i=0; i < totalPage; i++) {
page.setCurrentPage(i+1);
String[] pageRange = page.getPageRange();
System.out.println("当前第" + page.currentPage + "页");
for(int j=0; j < pageRange.length; j++) {
System.out.print(pageRange[j] + "  ");
}
System.out.println("\n");
}
}
}


还有一个分页查询的类

import java.io.IOException;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;

//import com.yida.framework.lucene5.util.Page;

/**
* Lucene搜索
* @author Lanxiaowei
*
*/
public class PageLearing {
public static void main(String[] args) throws ParseException, IOException {
//参数定义
String directoryPath = "D:\\workspace\\lucene6.4.1\\learing2017.8\\0901Exector\\Index\\dir3";
String fieldName = "contents";
String queryString = "true";
int currentPage = 1;
int pageSize = 10;

Page<Document> page = pageQuery(fieldName, queryString, directoryPath, currentPage, pageSize);
if(page == null || page.getItems() == null || page.getItems().size() == 0) {
System.out.println("No results found.");
return;
}
for(Document doc : page.getItems()) {
String path = doc.get("path");
String content = doc.get("contents");
System.out.println("path:" + path);
System.out.println("contents:" + content);
}
}
/**
* 创建索引阅读器
* @param directoryPath  索引目录
* @return
* @throws IOException   可能会抛出IO异常
*/
public static IndexReader createIndexReader(String directoryPath) throws IOException {
return DirectoryReader.open(FSDirectory.open(Paths.get(directoryPath, new String[0])));
}

/**
* 创建索引查询器
* @param directoryPath   索引目录
* @return
* @throws IOException
*/
public static IndexSearcher createIndexSearcher(String directoryPath) throws IOException {
return new IndexSearcher(createIndexReader(directoryPath));
}

/**
* 创建索引查询器
* @param reader
* @return
*/
public static IndexSearcher createIndexSearcher(IndexReader reader) {
return new IndexSearcher(reader);
}

/**
* Lucene分页查询
* @param directoryPath
* @param query
* @param page
* @throws IOException
*/
public static void pageQuery(String directoryPath,Query query,Page<Document> page) throws IOException {
IndexSearcher searcher = createIndexSearcher(directoryPath);
int totalRecord = searchTotalRecord(searcher,query);
//设置总记录数
page.setTotalRecord(totalRecord);
TopDocs topDocs = searcher.searchAfter(page.getAfterDoc(),query, page.getPageSize());
List<Document> docList = new ArrayList<Document>();
ScoreDoc[] docs = topDocs.scoreDocs;
int index = 0;
for (ScoreDoc scoreDoc : docs) {
int docID = scoreDoc.doc;
Document document = searcher.doc(docID);
if(index == docs.length - 1) {
page.setAfterDoc(scoreDoc);
page.setAfterDocId(docID);
}
docList.add(document);
index++;
}
page.setItems(docList);
searcher.getIndexReader().close();
}

/**
* 索引分页查询
* @param fieldName
* @param queryString
* @param currentPage
* @param pageSize
* @throws ParseException
* @throws IOException
*/
public static Page<Document> pageQuery(String fieldName,String queryString,String directoryPath,int currentPage,int pageSize) throws ParseException, IOException {
QueryParser parser = new QueryParser(fieldName, new StandardAnalyzer());
Query query = parser.parse(queryString);
Page<Document> page = new Page<Document>(currentPage,pageSize);
pageQuery(directoryPath, query, page);
return page;
}

/**
* @Title: searchTotalRecord
* @Description: 获取符合条件的总记录数
* @param query
* @return
* @throws IOException
*/
public static int searchTotalRecord(IndexSearcher searcher,Query query) throws IOException {
TopDocs topDocs = searcher.search(query, Integer.MAX_VALUE);
if(topDocs == null || topDocs.scoreDocs == null || topDocs.scoreDocs.length == 0) {
return 0;
}
ScoreDoc[] docs = topDocs.scoreDocs;
return docs.length;
}
}


这里是高亮查询

/**
* @ClassName: HighlighterParam
* @Description: 高亮器参数对象
* @author Lanxiaowei
*/
public class HighlighterParam {
/**是否需要设置高亮*/
private boolean highlight;
/**需要设置高亮的属性名*/
private String fieldName;
/**高亮前缀*/
private String prefix;
/**高亮后缀*/
private String stuffix;
/**显示摘要最大长度*/
private int fragmenterLength;
public boolean isHighlight() {
return highlight;
}
public void setHighlight(boolean highlight) {
this.highlight = highlight;
}
public String getFieldName() {
return fieldName;
}
public void setFieldName(String fieldName) {
this.fieldName = fieldName;
}
public String getPrefix() {
return prefix;
}
public void setPrefix(String prefix) {
this.prefix = prefix;
}
public String getStuffix() {
return stuffix;
}
public void setStuffix(String stuffix) {
this.stuffix = stuffix;
}
public int getFragmenterLength() {
return fragmenterLength;
}
public void setFragmenterLength(int fragmenterLength) {
this.fragmenterLength = fragmenterLength;
}
public HighlighterParam(boolean highlight, String fieldName, String prefix, String stuffix, int fragmenterLength) {
this.highlight = highlight;
this.fieldName = fieldName;
this.prefix = prefix;
this.stuffix = stuffix;
this.fragmenterLength = fragmenterLength;
}

public HighlighterParam(boolean highlight, String fieldName, int fragmenterLength) {
this.highlight = highlight;
this.fieldName = fieldName;
this.fragmenterLength = fragmenterLength;
}

public HighlighterParam(boolean highlight, String fieldName, String prefix, String stuffix) {
this.highlight = highlight;
this.fieldName = fieldName;
this.prefix = prefix;
this.stuffix = stuffix;
}
public HighlighterParam() {
}


接着,

最后,就是测试了,先建立索引,然后各种测试

Index测试

import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

/**
* 多线程创建索引
* @author Lanxiaowei
*
*/
public class MultiThreadIndexTest {
/**
* 创建了5个线程同时创建索引
* @param args
* @throws InterruptedException
*/
public static void main(String[] args) throws InterruptedException {
int threadCount = 5;
ExecutorService pool = Executors.newFixedThreadPool(threadCount);
CountDownLatch countDownLatch1 = new CountDownLatch(1);
CountDownLatch countDownLatch2 = new CountDownLatch(threadCount);
for(int i = 0; i < threadCount; i++) {
Runnable runnable = new IndexCreator("D:\\workspace\\lucene6.4.1\\learing2017.8\\0901Exector\\data\\doc" + (i+1),
"D:\\workspace\\lucene6.4.1\\learing2017.8\\0901Exector\\Index\\dir" + (i+1),threadCount,
countDownLatch1,countDownLatch2);
//子线程交给线程池管理
pool.execute(runnable);
}

countDownLatch1.countDown();
System.out.println("开始创建索引");
//等待所有线程都完成
countDownLatch2.await();
//线程全部完成工作
System.out.println("所有线程都创建索引完毕");
//释放线程池资源
pool.shutdown();
}
}


Search测试

import java.io.IOException;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiReader;
//MultiSearcher;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

/**
* 多线程多索引目录查询测试
* @author Lanxiaowei
*
*/
public class MultiThreadSearchTest {
public static void main(String[] args) throws InterruptedException, ExecutionException, IOException {
//每个线程都从八个索引目录中查询,所以最终5个线程的查询结果都一样
//multiThreadAndMultiReaderSearch();
//布尔查询
//BoolmultiReaderSearch();
//多索引目录查询(把多个索引目录当作一个索引目录)
multiReaderSearch();
//  DiffmultiReaderSearch();
}
//多索引目录查询
public static void multiReaderSearch()  throws InterruptedException, ExecutionException, IOException {
Directory directory0 = LuceneUtils.openFSDirectory("D:\\workspace\\lucene6.4.1\\learing2017.8\\0901Exector\\Index\\dir0");
Directory directory2 = LuceneUtils.openFSDirectory("D:\\workspace\\lucene6.4.1\\learing2017.8\\0901Exector\\Index\\dir2");
Directory directory3 = LuceneUtils.openFSDirectory("D:\\workspace\\lucene6.4.1\\learing2017.8\\0901Exector\\Index\\dir3");
Directory directory4 = LuceneUtils.openFSDirectory("D:\\workspace\\lucene6.4.1\\learing2017.8\\0901Exector\\Index\\dir4");
Directory directory5 = LuceneUtils.openFSDirectory("D:\\workspace\\lucene6.4.1\\learing2017.8\\0901Exector\\Index\\dir5");
Directory directory6 = LuceneUtils.openFSDirectory("D:\\workspace\\lucene6.4.1\\learing2017.8\\0901Exector\\Index\\dir6");
Directory directory7 = LuceneUtils.openFSDirectory("D:\\workspace\\lucene6.4.1\\learing2017.8\\0901Exector\\Index\\dir7");
Directory directory8 = LuceneUtils.openFSDirectory("D:\\workspace\\lucene6.4.1\\learing2017.8\\0901Exector\\Index\\dir8");
IndexReader reader0 = DirectoryReader.open(directory0);
IndexReader reader1 = DirectoryReader.open(directory1);
IndexReader reader2 = DirectoryReader.open(directory2);
IndexReader reader3 = DirectoryReader.open(directory3);
IndexReader reader4 = DirectoryReader.open(directory4);
IndexReader reader5 = DirectoryReader.open(directory5);
IndexReader reader6 = DirectoryReader.open(directory6);
IndexReader reader7 = DirectoryReader.open(directory7);
IndexReader reader8 = DirectoryReader.open(directory8);
MultiReader multiReader = new MultiReader(reader1,reader2,reader3,reader4,reader5,reader6,reader7,reader8);

IndexSearcher indexSearcher = LuceneUtils.getIndexSearcher(multiReader);
String field1 = "name";
String keyword = "草原";
//  String field1 = "tvName";
//   String keyword = "cctv2";
Query query = new TermQuery(new Term(field1,keyword ));

List<Document> list = LuceneUtils.query(indexSearcher, query);

if(null == list || list.size() <= 0) {
System.out.println("No results.");
return;
}

for(Document doc : list) {
// System.out.println("实际搜索到的记录数 => " + doc.getField(field1));
// System.out.println("实际搜索到的记录数 => " + doc.getField("tvName"));
// System.out.println("实际搜索到的记录数 => " + doc.getValues("tvName"));//getField("otherNames"));
// System.out.println("实际搜索到的记录数 => " + doc.getFields());
//  System.out.println("实际搜索到的记录数 => " + doc.getField("otherNames"));
System.out.println("实际搜索到的记录数 => " + doc.getField("otherNames"));
//System.out.println("实际搜索到的记录数 => " + doc.toString());
//String otherName = doc.get("normalTvName");
//  String name = doc.get("tvName");
String content = doc.get("name");
String content1 = doc.get("otherNames");
// System.out.println("别名为:" + otherName);
//  System.out.println("名字为:" + name);
System.out.println("contents:" + content);
System.out.println("contents1:" + content1);
}
}

/**
* 多索引目录且多线程查询,异步收集查询结果
* @throws InterruptedException
* @throws ExecutionException
* @throws IOException
*/
public static void multiThreadAndMultiReaderSearch()  throws InterruptedException, ExecutionException, IOException {
int count = 9;
ExecutorService pool = Executors.newFixedThreadPool(count);
Directory directory0 = LuceneUtils.openFSDirectory("D:\\workspace\\lucene6.4.1\\learing2017.8\\0901Exector\\Index\\dir0");
Directory directory2 = LuceneUtils.openFSDirectory("D:\\workspace\\lucene6.4.1\\learing2017.8\\0901Exector\\Index\\dir2");
Directory directory3 = LuceneUtils.openFSDirectory("D:\\workspace\\lucene6.4.1\\learing2017.8\\0901Exector\\Index\\dir3");
Directory directory4 = LuceneUtils.openFSDirectory("D:\\workspace\\lucene6.4.1\\learing2017.8\\0901Exector\\Index\\dir4");
Directory directory5 = LuceneUtils.openFSDirectory("D:\\workspace\\lucene6.4.1\\learing2017.8\\0901Exector\\Index\\dir5");
Directory directory6 = LuceneUtils.openFSDirectory("D:\\workspace\\lucene6.4.1\\learing2017.8\\0901Exector\\Index\\dir6");
Directory directory7 = LuceneUtils.openFSDirectory("D:\\workspace\\lucene6.4.1\\learing2017.8\\0901Exector\\Index\\dir7");
Directory directory8 = LuceneUtils.openFSDirectory("D:\\workspace\\lucene6.4.1\\learing2017.8\\0901Exector\\Index\\dir8");
IndexReader reader0 = DirectoryReader.open(directory0);
IndexReader reader1 = DirectoryReader.open(directory1);
IndexReader reader2 = DirectoryReader.open(directory2);
IndexReader reader3 = DirectoryReader.open(directory3);
IndexReader reader4 = DirectoryReader.open(directory4);
IndexReader reader5 = DirectoryReader.open(directory5);
IndexReader reader6 = DirectoryReader.open(directory5);
IndexReader reader7 = DirectoryReader.open(directory5);
IndexReader reader8 = DirectoryReader.open(directory5);
MultiReader multiReader = new MultiReader(reader1,reader2,reader3,reader4,reader5,reader6,reader7,reader8);
final IndexSearcher indexSearcher = LuceneUtils.getIndexSearcher(multiReader, pool);
// final Query query = new TermQuery(new Term("tvName","cctv1"));
final Query query = new TermQuery(new Term("name","草原"));
List<Future<List<Document>>> futures = new ArrayList<Future<List<Document>>>(count);
for (int i = 0; i < count; i++) {
futures.add(pool.submit(new Callable<List<Document>>() {
public List<Document> call() throws Exception {
return LuceneUtils.query(indexSearcher, query);
}
}));
}

int t = 0;
//通过Future异步获取线程执行后返回的结果
for (Future<List<Document>> future : futures) {
List<Document> list = future.get();
if(null == list || list.size() <= 0) {
t++;
continue;
}
for(Document doc : list) {
//  String path = doc.get("path");
//String content = doc.get("contents");
//  System.out.println("path:" + path);
//System.out.println("contents:" + content);
// System.out.println("实际搜索到的记录数 => " + doc.getField(field1));
// System.out.println("实际搜索到的记录数 => " + doc.getField("name"));
//   String otherName = doc.get("normalTvName");
//  System.out.println("实际搜索到的记录数 => " + doc.getField("normalTvName"));
//String name = doc.get("tvName");
// System.out.println("别名为:" + otherName);
// System.out.println("名字为:" + name);
String otherName = doc.get("otherNames");
System.out.println("实际搜索到的记录数 => " + doc.getField("otherNames"));
String name = doc.get("name");
System.out.println("别名为:" + otherName);
System.out.println("名字为:" + name);
}
System.out.println("");
}
//释放线程池资源
pool.shutdown();

if(t == count) {
System.out.println("No results.");
}
}
}


还有一个索引文档,索引目录地址这些,需要你自己去改改

参考文献:

lucene5版索引创建:http://blog.csdn.net/asdfsadfasdfsa/article/details/77649108

多线程多目录索引:http://blog.csdn.net/asdfsadfasdfsa/article/details/77649363

多线程并发问题:http://blog.csdn.net/xzm_rainbow/article/details/18965507

注意:http://blog.csdn.net/u014783753/article/details/43266325
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: