Lucene 入门和简单封装
2016-03-06 09:17
555 查看
package com.whf.demo; import java.io.IOException; import java.util.concurrent.ExecutorService; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.Version; /** * document(包含多个Field) Field(一个属性) Analyzer(分词处理) IndexWriter(创建索引) * Directory(索引的存储的位置,FSDirectory和RAMDirectory) Term(String field->String * 待检索的关键词) * * @author whf * */ public class LuceneManager { private volatile static LuceneManager singleton = null; private volatile static IndexWriter writer = null; private volatile static IndexReader reader = null; private volatile static IndexSearcher searcher = null; private final Lock writerLock = new ReentrantLock(); private final static Object obj = new Object(); private static Version version = Version.LUCENE_CURRENT; /** * 单例构造函数 * * @return */ private LuceneManager() { } /** * 获取LuceneTools单例实例(obj同步) * * @return */ public static LuceneManager getInstance() { if (null == singleton) { synchronized (obj) { if (null == singleton) { singleton = new LuceneManager(); } } } return singleton; } /** * 获取IndexWriter单例实例(writerLock同步) 写单例 * * @param dir * @param config * @return */ public IndexWriter getIndexWriter(Directory dir, IndexWriterConfig config) { if (dir == null) throw new IllegalArgumentException("Directory can not be null."); if (config == null) throw new IllegalArgumentException( "IndexWriterConfig can not be null."); try { writerLock.lock(); if (writer == null) { if (IndexWriter.isLocked(dir)) { throw new LockObtainFailedException( "Directory of index had been locked."); } writer = new IndexWriter(dir, config); } } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { writerLock.unlock(); } return writer; } /** * 获取IndexReader对象 写多例 * * @param dir * @param enableNRTReader * 是否开启NRTReader * @return */ public IndexReader getIndexReader(Directory dir, boolean enableNRTReader) { if (dir == null) throw new IllegalArgumentException("Directory can not be null."); try { if (reader == null) { reader = DirectoryReader.open(dir); } else { if (enableNRTReader && reader instanceof DirectoryReader) { // 开启近实时Reader,能立即看到动态添加/删除的索引变化 reader = DirectoryReader .openIfChanged((DirectoryReader) reader); } } } catch (IOException e) { e.printStackTrace(); } return reader; } /** * 获取IndexReader对象(默认不启用NETReader) * * @param dir * @return */ public IndexReader getIndexReader(Directory dir) { return getIndexReader(dir, false); } /** * 获取IndexSearcher对象 * * @param reader * IndexReader对象实例 * @param executor * 如果你需要开启多线程查询,请提供ExecutorService对象参数 * @return */ public IndexSearcher getIndexSearcher(IndexReader reader, ExecutorService executor) { if (reader == null) throw new IllegalArgumentException( "The indexReader can not be null."); if (searcher == null) searcher = new IndexSearcher(reader); return searcher; } /** * 获取IndexSearcher对象(不支持多线程查询) * * @param reader * IndexReader对象实例 * @return */ public IndexSearcher getIndexSearcher(IndexReader reader) { return getIndexSearcher(reader, null); } /** * 创建QueryParser对象 QueryParser parser=new QueryParser("字段名称","分析器实例"); Query * q=parser.parse("关键词"); * parser.setDefaultOperator(QueryParser.Opertator.AND); * 同时含有多个关键字,如果是QueryParser.Opertator.OR表示或者 IndexSearcher searcher=new * IndexSearcher(reader); Hits hit=searcher.search(q); * * @param field * @param analyzer * @return */ public static QueryParser createQueryParser(String field, Analyzer analyzer) { return new QueryParser(field, analyzer); } /** * 创建QueryParser对象 QueryParser parser=new QueryParser("字段名称","分析器实例"); Query * q=parser.parse("关键词"); * parser.setDefaultOperator(QueryParser.Opertator.AND); * 同时含有多个关键字,如果是QueryParser.Opertator.OR表示或者 IndexSearcher searcher=new * IndexSearcher(reader); Hits hit=searcher.search(q); * * @param field * @param analyzer * @return */ public static QueryParser createMultiFieldQueryParser(String[] fields, Analyzer analyzer) { return new MultiFieldQueryParser(fields, analyzer); } /** * 关闭IndexWriter * * @param writer */ public static void closeIndexWriter(IndexWriter writer) { if (writer != null) { try { writer.close(); writer = null; } catch (IOException e) { e.printStackTrace(); } } } /** * 关闭IndexReader * * @param reader */ public static void closeIndexReader(IndexReader reader) { if (reader != null) { try { reader.close(); reader = null; } catch (IOException e) { e.printStackTrace(); } } } /** * 关闭IndexReader和IndexWriter * * @param reader * @param writer */ public static void closeAll(IndexReader reader, IndexWriter writer) { closeIndexReader(reader); closeIndexWriter(writer); } }
package com.whf.demo; import java.io.File; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Set; import java.util.concurrent.ExecutorService; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.Filter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.Scorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; import org.wltea.analyzer.lucene.IKAnalyzer; import com.whf.pojo.HighlighterParam; import com.whf.pojo.Page; public class LuceneUtil { private static final LuceneManager manager = LuceneManager.getInstance(); private static Analyzer analyzer = new IKAnalyzer(true); private static Version version = Version.LUCENE_CURRENT; private static FSDirectory fsDirectory = null; private static RAMDirectory ramDirectory = null; /** * 打开索引目录 * * @param luceneDir * @return * @throws IOException */ public static FSDirectory openFSDirectory(String luceneDir) { if (fsDirectory == null) try { File dir = new File(luceneDir); if (!dir.exists()) dir.mkdirs(); fsDirectory = FSDirectory.open(Paths.get(luceneDir)); // 注意:isLocked方法内部会试图去获取Lock, // 如果获取到Lock,会关闭它,否则return false表示索引目录没有被锁. // 这也就是为什么unlock方法被从IndexWriter类中移除的原因 IndexWriter.isLocked(fsDirectory); } catch (IOException e) { e.printStackTrace(); } return fsDirectory; } /** * 打开内存目录 * * @param luceneDir * @return * @throws IOException */ public static RAMDirectory openRAMDirectory() { if (ramDirectory == null) return new RAMDirectory(); else return ramDirectory; } /** * 关闭索引目录并销毁 * * @param directory * @throws IOException */ public static void closeDirectory(Directory directory) { if (directory != null) { try { directory.close(); } catch (IOException e) { e.printStackTrace(); } directory = null; } } /** * 关闭IndexReader * * @param reader */ public static void closeIndexReader(IndexReader reader) { if (reader != null) { try { reader.close(); reader = null; } catch (IOException e) { e.printStackTrace(); } } } /** * 关闭IndexWriter * * @param writer */ public static void closeIndexWriter(IndexWriter writer) { if (writer != null) { try { writer.close(); writer = null; } catch (IOException e) { e.printStackTrace(); } } } /** * 关闭IndexReader和IndexWriter * * @param reader * @param writer */ public static void closeAll(IndexReader reader, IndexWriter writer) { closeIndexReader(reader); closeIndexWriter(writer); } /** * 获取IndexWriter * * @param dir * @param config * @return */ public static IndexWriter getIndexWrtier(Directory dir, IndexWriterConfig config) { return manager.getIndexWriter(dir, config); } /** * 获取IndexWriter * * @param dir * @param config * @return */ public static IndexWriter getIndexWrtier(String directoryPath, IndexWriterConfig config) { FSDirectory directory = openFSDirectory(directoryPath); return manager.getIndexWriter(directory, config); } /** * 获取IndexReader * * @param dir * @param enableNRTReader * 是否开启NRTReader * @return */ public static IndexReader getIndexReader(Directory dir, boolean enableNRTReader) { return manager.getIndexReader(dir, enableNRTReader); } /** * 获取IndexReader(默认不启用NRTReader) * * @param dir * @return */ public static IndexReader getIndexReader(Directory dir) { return manager.getIndexReader(dir); } /** * 获取IndexSearcher * * @param reader * IndexReader对象 * @param executor * 如果你需要开启多线程查询,请提供ExecutorService对象参数 * @return */ public static IndexSearcher getIndexSearcher(IndexReader reader, ExecutorService executor) { return manager.getIndexSearcher(reader, executor); } /** * 获取IndexSearcher(不支持多线程查询) * * @param reader * IndexReader对象 * @return */ public static IndexSearcher getIndexSearcher(IndexReader reader) { return manager.getIndexSearcher(reader); } /** * 删除索引[注意:请自己关闭IndexWriter对象] * * @param writer * @param field * @param value */ public static void deleteIndex(IndexWriter writer, String field, String keyword) { try { writer.deleteDocuments(new Term[] { new Term(field, keyword) }); } catch (IOException e) { e.printStackTrace(); } } /** * 删除索引[注意:请自己关闭IndexWriter对象] * * @param writer * @param term */ public static void deleteIndexs(IndexWriter writer, Term[] terms) { try { writer.deleteDocuments(terms); } catch (IOException e) { e.printStackTrace(); } } /** * 删除索引[注意:请自己关闭IndexWriter对象] * * @param writer * @param field * @param value */ public static void deleteIndex(IndexWriter writer, Term term) { try { writer.deleteDocuments(new Term[] { term }); } catch (IOException e) { e.printStackTrace(); } } /** * 批量删除索引[注意:请自己关闭IndexWriter对象] * * @param writer * @param querys */ public static void deleteIndexs(IndexWriter writer, Query[] querys) { try { writer.deleteDocuments(querys); } catch (IOException e) { e.printStackTrace(); } } /** * 删除索引[注意:请自己关闭IndexWriter对象] * * @param writer * @param query */ public static void deleteIndex(IndexWriter writer, Query query) { try { writer.deleteDocuments(query); } catch (IOException e) { e.printStackTrace(); } } /** * 删除所有索引文档 * * @param writer */ public static void deleteAllIndex(IndexWriter writer) { try { writer.deleteAll(); } catch (IOException e) { e.printStackTrace(); } } /** * Term term = new Term("id","1234567"); * 先去索引文件里查找id为1234567的Doc,如果有就更新它(如果有多条,最后更新后只有一条)。如果没有就新增. * 数据库更新的时候,我们可以只针对某个列来更新,而lucene只能针对一行数据更新。 * * @param writer * @param term * @param document */ public static void updateIndex(IndexWriter writer, Term term, Document document) { try { writer.updateDocument(term, document); } catch (IOException e) { e.printStackTrace(); } } /** * 添加索引文档 * * @param writer * @param doc */ public static void addIndex(IndexWriter writer, Document document) { updateIndex(writer, null, document); // writer.addDocument(document); } /** * 批量添加索引文档 * * @param writer * @param doc */ public static void addIndex(IndexWriter writer, List<Document> documents) { try { writer.addDocuments(documents); } catch (IOException e) { e.printStackTrace(); } } /** * 将内存的数据写至外存 * * @param writer * @param ramDirectory * @param fsramDirectory * @param analyzer */ public static void OptimizeRAMToFSDirectory(IndexWriter writer, Directory fsDirectory, Analyzer analyzer) { try { writer.addIndexes(new Directory[] { fsDirectory }); } catch (IOException e) { e.printStackTrace(); } } /** * 索引文档查询 * * @param searcher * @param query * @param sort * @return */ public static List<Document> query(IndexSearcher searcher, Query query, Integer rows, Sort sort, Filter filter) { TopDocs topDocs = null; List<Document> docList = null; try { if (sort != null) { if (filter != null) { topDocs = searcher.search(query, filter, (rows == null ? Integer.MAX_VALUE : rows), sort); } else { topDocs = searcher.search(query, (rows == null ? Integer.MAX_VALUE : rows), sort); } } else { if (filter != null) { topDocs = searcher.search(query, filter, (rows == null ? Integer.MAX_VALUE : rows)); } else { topDocs = searcher.search(query, (rows == null ? Integer.MAX_VALUE : rows)); } } ScoreDoc[] scores = topDocs.scoreDocs; int length = scores.length; if (length <= 0) { return Collections.emptyList(); } docList = new ArrayList<Document>(); for (int i = 0; i < length; i++) { System.out.println(scores[i].score + " " + scores[i].doc); Document doc = searcher.doc(scores[i].doc); docList.add(doc); } } catch (IOException e) { e.printStackTrace(); } return docList; } /** * 返回索引文档的总数[注意:请自己手动关闭IndexReader] * * @param reader * @return */ public static int getIndexTotalCount(IndexReader reader) { return reader.numDocs(); } /** * 返回索引文档中最大文档ID[注意:请自己手动关闭IndexReader] * * @param reader * @return */ public static int getMaxDocId(IndexReader reader) { return reader.maxDoc(); } /** * 返回已经删除尚未提交的文档总数[注意:请自己手动关闭IndexReader] * * @param reader * @return */ public static int getDeletedDocNum(IndexReader reader) { return getMaxDocId(reader) - getIndexTotalCount(reader); } /** * 根据docId查询索引文档 * * @param reader * IndexReader对象 * @param docID * documentId * @param fieldsToLoad * 需要返回的field * @return */ public static Document findDocumentByDocId(IndexReader reader, int docID, Set<String> fieldsToLoad) { try { return reader.document(docID, fieldsToLoad); } catch (IOException e) { return null; } } /** * 根据docId查询索引文档 * * @param reader * IndexReader对象 * @param docID * documentId * @return */ public static Document findDocumentByDocId(IndexReader reader, int docID) { return findDocumentByDocId(reader, docID, null); } /** * @Title: createHighlighter * @Description: 创建高亮器 * @param query * 索引查询对象 * @param prefix * 高亮前缀字符串 * @param stuffix * 高亮后缀字符串 * @param fragmenterLength * 摘要最大长度 * @return */ public static Highlighter createHighlighter(Query query, String prefix, String stuffix, int fragmenterLength) { Formatter formatter = new SimpleHTMLFormatter((prefix == null || prefix .trim().length() == 0) ? "<font color=\"red\">" : prefix, (stuffix == null || stuffix.trim().length() == 0) ? "</font>" : stuffix); Scorer fragmentScorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, fragmentScorer); Fragmenter fragmenter = new SimpleFragmenter(fragmenterLength <= 0 ? 50 : fragmenterLength); highlighter.setTextFragmenter(fragmenter); return highlighter; } /** * @Title: highlight * @Description: 生成高亮文本 * @param document * 索引文档对象 * @param highlighter * 高亮器 * @param analyzer * 索引分词器 * @param field * 高亮字段 * @return * @throws IOException * @throws InvalidTokenOffsetsException */ public static String highlight(Document document, Highlighter highlighter, Analyzer analyzer, String field) throws IOException { List<IndexableField> list = document.getFields(); for (IndexableField fieldable : list) { String fieldValue = fieldable.stringValue(); if (fieldable.name().equals(field)) { try { fieldValue = highlighter.getBestFragment(analyzer, field, fieldValue); } catch (InvalidTokenOffsetsException e) { fieldValue = fieldable.stringValue(); } return (fieldValue == null || fieldValue.trim().length() == 0) ? fieldable .stringValue() : fieldValue; } } return null; } /** * @Title: searchTotalRecord * @Description: 获取符合条件的总记录数 * @param query * @return * @throws IOException */ public static int searchTotalRecord(IndexSearcher search, Query query) { ScoreDoc[] docs = null; try { TopDocs topDocs = search.search(query, Integer.MAX_VALUE); if (topDocs == null || topDocs.scoreDocs == null || topDocs.scoreDocs.length == 0) { return 0; } docs = topDocs.scoreDocs; } catch (IOException e) { e.printStackTrace(); } return docs.length; } /** * @Title: pageQuery * @Description: Lucene分页查询 * @param searcher * @param query * @param page * @throws IOException */ public static void pageQuery(IndexSearcher searcher, Directory directory, Query query, Page<Document> page) { int totalRecord = searchTotalRecord(searcher, query); // 设置总记录数 page.setTotalRecord(totalRecord); TopDocs topDocs = null; try { topDocs = searcher.searchAfter(page.getAfterDoc(), query, page.getPageSize()); } catch (IOException e) { e.printStackTrace(); } List<Document> docList = new ArrayList<Document>(); ScoreDoc[] docs = topDocs.scoreDocs; int index = 0; for (ScoreDoc scoreDoc : docs) { int docID = scoreDoc.doc; Document document = null; try { document = searcher.doc(docID); } catch (IOException e) { e.printStackTrace(); } if (index == docs.length - 1) { page.setAfterDoc(scoreDoc); page.setAfterDocId(docID); } docList.add(document); index++; } page.setItems(docList); closeIndexReader(searcher.getIndexReader()); } /** * @Title: pageQuery * @Description: 分页查询[如果设置了高亮,则会更新索引文档] * @param searcher * @param directory * @param query * @param page * @param highlighterParam * @param writerConfig * @throws IOException */ public static void pageQuery(IndexSearcher searcher, Directory directory, Query query, Page<Document> page, HighlighterParam highlighterParam, IndexWriterConfig writerConfig) throws IOException { IndexWriter writer = null; // 若未设置高亮 if (null == highlighterParam || !highlighterParam.isHighlight()) { pageQuery(searcher, directory, query, page); } else { int totalRecord = searchTotalRecord(searcher, query); System.out.println("totalRecord:" + totalRecord); // 设置总记录数 page.setTotalRecord(totalRecord); TopDocs topDocs = searcher.searchAfter(page.getAfterDoc(), query, page.getPageSize()); List<Document> docList = new ArrayList<Document>(); ScoreDoc[] docs = topDocs.scoreDocs; int index = 0; writer = getIndexWrtier(directory, writerConfig); for (ScoreDoc scoreDoc : docs) { int docID = scoreDoc.doc; Document document = searcher.doc(docID); String content = document.get(highlighterParam.getFieldName()); if (null != content && content.trim().length() > 0) { // 创建高亮器 Highlighter highlighter = LuceneUtil.createHighlighter( query, highlighterParam.getPrefix(), highlighterParam.getStuffix(), highlighterParam.getFragmenterLength()); String text = highlight(document, highlighter, analyzer, highlighterParam.getFieldName()); // 若高亮后跟原始文本不相同,表示高亮成功 if (!text.equals(content)) { Document tempdocument = new Document(); List<IndexableField> indexableFieldList = document .getFields(); if (null != indexableFieldList && indexableFieldList.size() > 0) { for (IndexableField field : indexableFieldList) { if (field.name().equals( highlighterParam.getFieldName())) { tempdocument.add(new TextField( field.name(), text, Store.YES)); } else { tempdocument.add(field); } } } updateIndex(writer, new Term(highlighterParam.getFieldName(), content), tempdocument); document = tempdocument; } } if (index == docs.length - 1) { page.setAfterDoc(scoreDoc); page.setAfterDocId(docID); } docList.add(document); index++; } page.setItems(docList); } closeIndexReader(searcher.getIndexReader()); closeIndexWriter(writer); } /** * 创建QueryParser * * @return */ public static QueryParser createCustomQueryParser(String field, Analyzer analyzer) { return manager.createQueryParser(field, analyzer); } /** * 创建多field的QueryParser * * @param fields * @param analyzer * @return */ public static QueryParser createMultiFieldQueryParser(String[] fields, Analyzer analyzer) { return manager.createMultiFieldQueryParser(fields, analyzer); } /** * 可以在查询的时候获取TermVector * * @return */ public static Field storeVectorTextField(String name, String value, Store store) { FieldType type = new FieldType(); type.setStored(true); type.setStoreTermVectors(true); type.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); return new Field(name, value, type); } }
package com.whf.pojo; import java.io.IOException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.queries.CustomScoreProvider; import org.apache.lucene.queries.CustomScoreQuery; import org.apache.lucene.search.Query; public class MyCountQuery extends CustomScoreQuery{ private String field=null; private class MyCountQueryScoreProvider extends CustomScoreProvider{ private String field=null; public MyCountQueryScoreProvider(LeafReaderContext context) { super(context); } public MyCountQueryScoreProvider(LeafReaderContext context, String field) { super(context); this.field = field; } @Override public float customScore(int arg0, float arg1, float[] arg2) throws IOException { IndexReader reader = context.reader(); Terms tv = reader.getTermVector(arg0, field); TermsEnum termsEnum = null; int numTerms = 0; if (tv != null) { termsEnum = tv.iterator(); while ((termsEnum.next()) != null) { numTerms++; } } return (float) (numTerms); } } public MyCountQuery(Query subQuery) { super(subQuery); } public MyCountQuery(Query subQuery, String field) { super(subQuery); this.field = field; } @Override protected CustomScoreProvider getCustomScoreProvider( LeafReaderContext context) throws IOException { return new MyCountQueryScoreProvider(context, this.field); } }
package com.whf.pojo; /** * @ClassName: HighlighterParam * @Description: 高亮器参数对象 * @author Lanxiaowei * @date 2014-3-30 下午12:22:08 */ public class HighlighterParam { /** 是否需要设置高亮 */ private boolean highlight; /** 需要设置高亮的属性名 */ private String fieldName; /** 高亮前缀 */ private String prefix; /** 高亮后缀 */ private String stuffix; /** 显示摘要最大长度 */ private int fragmenterLength; public boolean isHighlight() { return highlight; } public void setHighlight(boolean highlight) { this.highlight = highlight; } public String getFieldName() { return fieldName; } public void setFieldName(String fieldName) { this.fieldName = fieldName; } public String getPrefix() { return prefix; } public void setPrefix(String prefix) { this.prefix = prefix; } public String getStuffix() { return stuffix; } public void setStuffix(String stuffix) { this.stuffix = stuffix; } public int getFragmenterLength() { return fragmenterLength; } public void setFragmenterLength(int fragmenterLength) { this.fragmenterLength = fragmenterLength; } public HighlighterParam(boolean highlight, String fieldName, String prefix, String stuffix, int fragmenterLength) { this.highlight = highlight; this.fieldName = fieldName; this.prefix = prefix; this.stuffix = stuffix; this.fragmenterLength = fragmenterLength; } public HighlighterParam(boolean highlight, String fieldName, int fragmenterLength) { this.highlight = highlight; this.fieldName = fieldName; this.fragmenterLength = fragmenterLength; } public HighlighterParam(boolean highlight, String fieldName, String prefix, String stuffix) { this.highlight = highlight; this.fieldName = fieldName; this.prefix = prefix; this.stuffix = stuffix; } public HighlighterParam() { } }
package com.whf.pojo; import java.io.IOException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.queries.CustomScoreProvider; import org.apache.lucene.queries.CustomScoreQuery; import org.apache.lucene.search.Query; public class MyCountQuery extends CustomScoreQuery{ private String field=null; private class MyCountQueryScoreProvider extends CustomScoreProvider{ private String field=null; public MyCountQueryScoreProvider(LeafReaderContext context) { super(context); } public MyCountQueryScoreProvider(LeafReaderContext context, String field) { super(context); this.field = field; } @Override public float customScore(int arg0, float arg1, float[] arg2) throws IOException { IndexReader reader = context.reader(); Terms tv = reader.getTermVector(arg0, field); TermsEnum termsEnum = null; int numTerms = 0; if (tv != null) { termsEnum = tv.iterator(); while ((termsEnum.next()) != null) { numTerms++; } } return (float) (numTerms); } } public MyCountQuery(Query subQuery) { super(subQuery); } public MyCountQuery(Query subQuery, String field) { super(subQuery); this.field = field; } @Override protected CustomScoreProvider getCustomScoreProvider( LeafReaderContext context) throws IOException { return new MyCountQueryScoreProvider(context, this.field); } }
package com.whf.pojo; import java.util.ArrayList; import java.util.Collection; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.search.ScoreDoc; public class Page<T> { /** 当前第几页(从1开始计算) */ private int currentPage; /** 每页显示几条 */ private int pageSize; /** 总记录数 */ private int totalRecord; /** 总页数 */ private int totalPage; /** 分页数据集合[用泛型T来限定集合元素类型] */ private Collection<T> items; /** 当前显示起始索引(从零开始计算) */ private int startIndex; /** 当前显示结束索引(从零开始计算) */ private int endIndex; /** 一组最多显示几个页码[比如Google一组最多显示10个页码] */ private int groupSize; /** 左边偏移量 */ private int leftOffset = 5; /** 右边偏移量 */ private int rightOffset = 4; /** 当前页码范围 */ private String[] pageRange; /** 分页数据 */ private List<Document> docList; /** 上一页最后一个ScoreDoc对象 */ private ScoreDoc afterDoc; /** 上一页最后一个ScoreDoc对象的Document对象ID */ private int afterDocId; public void setRangeIndex() { int groupSize = getGroupSize(); int totalPage = getTotalPage(); if (totalPage < 2) { startIndex = 0; endIndex = totalPage - startIndex; } else { int currentPage = getCurrentPage(); if (groupSize >= totalPage) { startIndex = 0; endIndex = totalPage - startIndex - 1; } else { int leftOffset = getLeftOffset(); int middleOffset = getMiddleOffset(); if (-1 == middleOffset) { startIndex = 0; endIndex = groupSize - 1; } else if (currentPage <= leftOffset) { startIndex = 0; endIndex = groupSize - 1; } else { startIndex = currentPage - leftOffset - 1; if (currentPage + rightOffset > totalPage) { endIndex = totalPage - 1; } else { endIndex = currentPage + rightOffset - 1; } } } } } public int getCurrentPage() { if (currentPage <= 0) { currentPage = 1; } else { int totalPage = getTotalPage(); if (totalPage > 0 && currentPage > getTotalPage()) { currentPage = totalPage; } } return currentPage; } public void setCurrentPage(int currentPage) { this.currentPage = currentPage; } public int getPageSize() { if (pageSize <= 0) { pageSize = 10; } return pageSize; } public void setPageSize(int pageSize) { this.pageSize = pageSize; } public int getTotalRecord() { return totalRecord; } public void setTotalRecord(int totalRecord) { this.totalRecord = totalRecord; } public int getTotalPage() { int totalRecord = getTotalRecord(); if (totalRecord == 0) { totalPage = 0; } else { int pageSize = getPageSize(); totalPage = totalRecord % pageSize == 0 ? totalRecord / pageSize : (totalRecord / pageSize) + 1; } return totalPage; } public void setTotalPage(int totalPage) { this.totalPage = totalPage; } public int getStartIndex() { return startIndex; } public void setStartIndex(int startIndex) { this.startIndex = startIndex; } public int getEndIndex() { return endIndex; } public void setEndIndex(int endIndex) { this.endIndex = endIndex; } public int getGroupSize() { if (groupSize <= 0) { groupSize = 10; } return groupSize; } public void setGroupSize(int groupSize) { this.groupSize = groupSize; } public int getLeftOffset() { leftOffset = getGroupSize() / 2; return leftOffset; } public void setLeftOffset(int leftOffset) { this.leftOffset = leftOffset; } public int getRightOffset() { int groupSize = getGroupSize(); if (groupSize % 2 == 0) { rightOffset = (groupSize / 2) - 1; } else { rightOffset = groupSize / 2; } return rightOffset; } public void setRightOffset(int rightOffset) { this.rightOffset = rightOffset; } /** 中心位置索引[从1开始计算] */ public int getMiddleOffset() { int groupSize = getGroupSize(); int totalPage = getTotalPage(); if (groupSize >= totalPage) { return -1; } return getLeftOffset() + 1; } public String[] getPageRange() { setRangeIndex(); int size = endIndex - startIndex + 1; if (size <= 0) { return new String[0]; } if (totalPage == 1) { return new String[] { "1" }; } pageRange = new String[size]; for (int i = 0; i < size; i++) { pageRange[i] = (startIndex + i + 1) + ""; } return pageRange; } public void setPageRange(String[] pageRange) { this.pageRange = pageRange; } public Collection<T> getItems() { return items; } public void setItems(Collection<T> items) { this.items = items; } public List<Document> getDocList() { return docList; } public void setDocList(List<Document> docList) { this.docList = docList; } public ScoreDoc getAfterDoc() { setAfterDocId(afterDocId); return afterDoc; } public void setAfterDoc(ScoreDoc afterDoc) { this.afterDoc = afterDoc; } public int getAfterDocId() { return afterDocId; } public void setAfterDocId(int afterDocId) { this.afterDocId = afterDocId; if (null == afterDoc) { this.afterDoc = new ScoreDoc(afterDocId, 1.0f); } } public Page() { } public Page(int currentPage, int pageSize) { this.currentPage = currentPage; this.pageSize = pageSize; } public Page(int currentPage, int pageSize, Collection<T> items) { this.currentPage = currentPage; this.pageSize = pageSize; this.items = items; } public Page(int currentPage, int pageSize, Collection<T> items, int groupSize) { this.currentPage = currentPage; this.pageSize = pageSize; this.items = items; this.groupSize = groupSize; } public Page(int currentPage, int pageSize, int groupSize, int afterDocId) { this.currentPage = currentPage; this.pageSize = pageSize; this.groupSize = groupSize; this.afterDocId = afterDocId; } public static void main(String[] args) { Collection<Integer> items = new ArrayList<Integer>(); int totalRecord = 201; for (int i = 0; i < totalRecord; i++) { items.add(new Integer(i)); } Page<Integer> page = new Page<Integer>(1, 10, items, 10); page.setTotalRecord(totalRecord); int totalPage = page.getTotalPage(); for (int i = 0; i < totalPage; i++) { page.setCurrentPage(i + 1); String[] pageRange = page.getPageRange(); System.out.println("当前第" + page.currentPage + "页"); for (int j = 0; j < pageRange.length; j++) { System.out.print(pageRange[j] + " "); } System.out.println("\n"); } } }
package com.whf.demo; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester; import org.apache.lucene.search.suggest.document.SuggestField; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Version; import com.whf.pojo.MyCountQuery; public class Main { public static final String path = "F://index_data"; /** * 优化: * @param args * @throws IOException * @throws ParseException */ public static void main(String[] args) throws IOException, ParseException { // Directory directory = LuceneUtil.openFSDirectory(path); // Analyzer analyzer = new StandardAnalyzer(); // IndexWriterConfig config = new IndexWriterConfig( // analyzer); // //有效提高效率,下次插入生效,默认16M // config.setRAMBufferSizeMB(256); // //缓存中文档的数目,不足时写入磁盘 // config.setMaxBufferedDocs(200); // IndexWriter writer = LuceneUtil.getIndexWrtier(directory, config); // try { // // Document doc1 = new Document(); // doc1.add(LuceneUtil.storeVectorTextField("name", "this is a demo", Store.YES)); // doc1.add(LuceneUtil.storeVectorTextField("desc", "whf", Store.YES)); // // Document doc2 = new Document(); // doc2.add(LuceneUtil.storeVectorTextField("name", "this is a test", Store.YES)); // doc2.add(LuceneUtil.storeVectorTextField("desc", "smx", Store.YES)); // //// writer.updateDocument(new Term("name", "very"), doc); // writer.addDocument(doc1); // writer.addDocument(doc2); // //强制合并段,提高检索速度 // writer.forceMerge(5); // } catch (Exception e) { // e.printStackTrace(); // } finally { // writer.commit(); // writer.close(); // } // IndexReader reader = LuceneUtil.getIndexReader(directory); // IndexSearcher searcher = LuceneUtil.getIndexSearcher(reader); // QueryParser parser = new MultiFieldQueryParser(new String[] { "name", "desc" }, analyzer); // Query query = new MatchAllDocsQuery(); // Query myquery=new MyCountQuery(query, "name"); // TopDocs tops = searcher.search(myquery, 100); // System.out.println("*****************"); //// System.out.println(tops.totalHits); // ScoreDoc scoreDoc[] = tops.scoreDocs; // for (int i = 0; i < scoreDoc.length; i++) { // System.out.println(scoreDoc[i].doc); //// Document doc = searcher.doc(scoreDoc[i].doc); //// System.out.println(scoreDoc[i].score); //// System.out.println(doc.getFields()); //// for(IndexableField field:doc){ //// System.out.println(field.name()+" "+field.stringValue()); //// } // } } // TermQuery、BooleanQuery、PhraseQuery、PrefixQuery、 // RangeQuery、MultiTermQuery、FilteredQuery、SpanQuery }
相关文章推荐
- Sublime Text3注册及汉化(支持Windows、MAC OS)
- 实现让元素看不到的几种方式
- JSON 之 SuperObject(9): TSuperType
- huhx的android封神之路-------->Activity的生命周期
- Android高性能ORM数据库DBFlow入门
- ViewController的lifecycle和autolayout
- Quick社区版第一个稳定版,Quick-Cocos2dx-Community 3.6 release发布!
- 2016蓝桥杯假期任务之《买不到的数目》
- G-数列有序!
- JSON 之 SuperObject(7): 可以省略的双引号
- [Unity游戏开发]向量在游戏开发中的应用(一)
- Codeforces 626C Block Towers「贪心」「二分」「数学规律」
- Web 开发工具类(3): JsonUtils
- 关于多态
- 怎么解决eclipse报PermGen space异常的问题
- JSON 之 SuperObject(6): 方法
- HDU 2544 最短路
- 爬虫模拟请求
- Android Studio中的项目 和引用Library中的minSdkVersion要 一致。
- 轻量级文本搜索引擎的后台设计、实现与优化