lucene + spring
2014-03-20 13:54
232 查看
目录结果
![](http://img.blog.csdn.net/20140319153502796?watermark/2/text/aHR0cDovL2Jsb2cuY3Nkbi5uZXQvaHd0XzIxMQ==/font/5a6L5L2T/fontsize/400/fill/I0JBQkFCMA==/dissolve/70/gravity/SouthEast)
一,lucene的索引工具类
[java] view
plaincopy
![](https://code.csdn.net/assets/CODE_ico.png)
![](https://code.csdn.net/assets/ico_fork.svg)
package com.hwt.lucene.index;
import java.io.File;
import java.io.IOException;
import java.util.List;
import net.paoding.analysis.analyzer.PaodingAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
/**
* lucene的索引工具类
*
* @author 黄文韬
*
*/
public class IndexUtils {
// 庖丁解牛分词器(单例)
private static Analyzer ANALYZER = new PaodingAnalyzer();
// 索引的路径
private static final String indexPath = "WebRoot/lucene/index";
/**
* 得到庖丁解牛分词器
*
* @return
*/
public static Analyzer getAnalyzer() {
return ANALYZER;
}
/**
* 得到路径对象
*
* @param path 相对路径
* @return
*/
public static Directory getDirectory(String path) {
Directory directory = null;
try {
directory = FSDirectory.open(new File(path));
} catch (IOException e) {
e.printStackTrace();
}
return directory;
}
/**
* 得到读索引类
* @return
*/
public static IndexReader getIndexReader() {
IndexReader reader = null;
try {
reader = IndexReader.open(getDirectory(indexPath));
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return reader;
}
/**
* 得到些索引类
* @return
*/
public static IndexWriter getIndexWriter() {
IndexWriter writer = null;
try {
writer = new IndexWriter(getDirectory(indexPath),
new IndexWriterConfig(Version.LUCENE_36, ANALYZER));
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return writer;
}
/**
* 得到索引搜索类
* @return
*/
public static IndexSearcher getIndexSearcher() {
IndexSearcher searcher = null;
try {
searcher = new IndexSearcher(getIndexReader());
} catch (Exception e) {
e.printStackTrace();
}
return searcher;
}
/**
* 创建索引
*
* @param result
*/
public static void createIndex(List<IndexField> result) {
// 得到输出索引类
IndexWriter indexWriter = null;
// 索引类
try {
indexWriter = getIndexWriter();
Document doc = new Document();
for (IndexField findx : result) {
// 是否存储:Store.YES/Store.NO
// 是否分词:
// Index.ANALYZED/Index.NOT_ANALYZED/Index.NO/Index.ANALYZED_NO_NORMS
doc.add(new Field(findx.getFieldName(), findx.getFieldValue(),
findx.getFieldStore(), findx.getFieldAnalyzed()));
}
indexWriter.addDocument(doc);
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
// 关闭writer
indexWriter.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* 优化索引
*/
public static void mergeIndex() {
IndexWriter indexWriter = null;
// 强制优化索引
try {
indexWriter = getIndexWriter();
indexWriter.forceMerge(1);
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
indexWriter.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* 更新所有
* @param fields 新的document字段信息
* @param term 需要替换的查找条件
*/
public static void updateIndex(List<IndexField> fields, Term term) {
// 得到输出索引类
IndexWriter indexWriter = null;
// 索引类
try {
indexWriter = getIndexWriter();
Document doc = new Document();
// 是否存储:Store.YES/Store.NO
// 是否分词:
// Index.ANALYZED/Index.NOT_ANALYZED/Index.NO/Index.ANALYZED_NO_NORMS
for (IndexField field : fields) {
doc.add(new Field(field.getFieldName(), field.getFieldValue(),
field.getFieldStore(), field.getFieldAnalyzed()));
}
indexWriter.updateDocument(term, doc, ANALYZER);
indexWriter.forceMerge(1);
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
// 关闭writer
indexWriter.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* 删除全部索引文件
*/
public static void deleteAll() {
IndexWriter writer = null;
try {
writer = getIndexWriter();
writer.deleteAll();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* 根据条件删除索引
* @param term 条件
*/
public static void delete(Term term) {
IndexWriter writer = null;
IndexReader reader = getIndexReader();
try {
writer = getIndexWriter();
writer.deleteDocuments(term);
writer.forceMerge(1);
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
二,文件类型的搜索
[java] view
plaincopy
![](https://code.csdn.net/assets/CODE_ico.png)
![](https://code.csdn.net/assets/ico_fork.svg)
package com.hwt.lucene.index;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
/**
* 文件类型的搜索
* @author 黄文韬
*
*/
public class FileDocument {
/**
* 将文件转换为一个document对象
* @param file 文件
* @return
*/
public Document fileToDocument(File file){
Document document=new Document();
document.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED));
document.add(new Field("content", this.readFileRetStr(file), Store.YES, Index.ANALYZED));
return document;
}
/**
* 将名字、内容字段转为document
* @param content 内容
* @param name 文件名字
* @return
*/
public Document stringToDocumet(String name,String content){
Document document=new Document();
document.add(new Field("name",name, Store.YES, Index.ANALYZED));
document.add(new Field("content", content, Store.YES, Index.ANALYZED));
return document;
}
/**
* 将文件内容转为string类型
* @param file 文件
* @return
*/
public String readFileRetStr(File file){
FileInputStream fStream = null;
String tempStr = "";
StringBuffer sBuffer = new StringBuffer();
try {
fStream = new FileInputStream(file);
BufferedReader bReader=new BufferedReader(new InputStreamReader(fStream,"UTF-8"));
while((tempStr=bReader.readLine())!=null){
sBuffer.append(tempStr);
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
fStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return sBuffer.toString();
}
}
三,封装索引字段类
[java] view
plaincopy
![](https://code.csdn.net/assets/CODE_ico.png)
![](https://code.csdn.net/assets/ico_fork.svg)
package com.hwt.lucene.index;
import java.io.Serializable;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
/**
* 封装索引字段类
* @author hwt
*
*/
public class IndexField implements Serializable{
private String fieldName;
private String fieldValue;
private Store fieldStore;//是否存储:Store.YES/Store.NO
private Index fieldAnalyzed;//是否分词: Index.ANALYZED/Index.NOT_ANALYZED/Index.NO/Index.ANALYZED_NO_NORMS
public String getFieldName() {
return fieldName;
}
public void setFieldName(String fieldName) {
this.fieldName = fieldName;
}
public String getFieldValue() {
return fieldValue;
}
public void setFieldValue(String fieldValue) {
this.fieldValue = fieldValue;
}
public Store getFieldStore() {
return fieldStore;
}
public void setFieldStore(Store fieldStore) {
this.fieldStore = fieldStore;
}
public Index getFieldAnalyzed() {
return fieldAnalyzed;
}
public void setFieldAnalyzed(Index fieldAnalyzed) {
this.fieldAnalyzed = fieldAnalyzed;
}
}
四,分页缓存类
[java] view
plaincopy
![](https://code.csdn.net/assets/CODE_ico.png)
![](https://code.csdn.net/assets/ico_fork.svg)
package com.hwt.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
public class CachePage {
private static final Logger LOGGER = Logger.getLogger(CachePage.class);
private int pageStart = 1; // 页码
private int pageSize = 15; // 每页显示的大小
private int pageNum = 0; //总页数
private int totalNum = 0; //总记录条数
private int cacheSize = 100; // 缓存大小
private List<Document> cacheList = new ArrayList<Document>(); // 缓存列表
/**
* 构造方法
* @param pageSize 每页大小
* @param cacheSize 缓存大小
*/
public CachePage(Integer pageSize, Integer cacheSize) {
this.pageSize = pageSize;
if (cacheSize != null) {
this.cacheSize = cacheSize;
}
}
/**
* 判断是否存在缓存中
*
* @param page
* 页码
* @return
*/
public boolean inCache(int page) {
// 当前缓存对象的个数
int cacheNum = cacheList.size();
if (cacheNum > 0) {
if (page <= 0) {
page = 1;
}
// 判断当前页是不是在缓存中
if (page >= pageStart && (page - pageStart) * pageSize <= cacheNum) {
return true;
} else {
return false;
}
}else {
return false;
}
}
/**
* 清空缓存
* @param pageNum 起始页
*/
public void refleshCache() {
// this.isFirst = true;
for (int i = cacheList.size() -1 ; i >= 0; i--) {
cacheList.remove(i);
}
}
/**
* 新增缓存
*
* @param doc
*/
public void addCache(Document doc) {
if (this.cacheList.size() < cacheSize) {
this.cacheList.add(doc);
} else {
LOGGER.info("缓存池已满");
}
}
/**
* 读缓存中的数据
* @param page
* @return
*/
public Map readCache(int page) {
// 判断是否存在于缓存池中
int start = (page - pageStart) * pageSize;
int end = start + pageSize > cacheList.size() ? cacheList.size()
: start + pageSize;
//缓存中的结果集
List<Document> cacheRs = new ArrayList<Document>();
for (int i = start; i < end; i++) {
cacheRs.add(cacheList.get(i));
}
//缓存结果集
Map resultMap = new HashMap();
resultMap.put("currentPage", page); //当前页
resultMap.put("totalNum", totalNum); //总记录条数
resultMap.put("pageNum", pageNum); //总页数
resultMap.put("list", cacheRs);
return resultMap;
}
/**
* 搜索
* @param query query对象
* @param sort 排序对象
* @param page 页码
* @return
*/
public Map search(Query query,Sort sort,int page){
if (page < 0) {
page = 1;
}
//如果存在缓存中
if (inCache(page)) {
return readCache(page);
}else {//如果不在缓存中
IndexSearcher searcher = IndexUtils.getIndexSearcher();
try {
//显示条数
int querySize = (page*pageSize / cacheSize + 1 )*100;
//设置查询、查询显示的条数、排序对象
TopDocs topDocs = searcher.search(query, querySize , sort);
//总共记录条数
int totalNum = topDocs.totalHits;
int pageNum = totalNum % pageSize == 0 ? totalNum / pageSize : totalNum / pageSize + 1;
if (page > pageNum) {
page = pageNum;
}
//得到记录集
ScoreDoc[] docs = topDocs.scoreDocs;
//保存当前页的前后两页放入缓存中
int startPage = 1;
int endPage = 1;
if (page < 3) { //前五页
startPage = 1;
endPage = startPage + 4 > pageNum ? pageNum : startPage + 4;
}else if(page > pageNum - 2){ //后五页
endPage = pageNum ;
startPage = endPage - 4 < 0 ? 1 : endPage - 4;
} else { //中间页
startPage = page - 2 <= 0 ? 1 : page - 2;
endPage = page + 2 > pageNum ? pageNum : page + 2;
}
//清空缓存
refleshCache();
int startSize = (startPage - 1)*pageSize ;
int endSize = startSize + cacheSize > totalNum ? totalNum : startSize + cacheSize ;
//将对象加入缓存中
for (int i = startSize ; i < endSize; i++) {
Document doc = searcher.doc(docs[i].doc);
addCache(doc);
}
//替换缓存集合
this.pageNum = pageNum;
this.totalNum = totalNum;
this.pageStart = startPage;
return readCache(page);
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
}
public Integer getPageSize() {
return pageSize;
}
public void setPageSize(Integer pageSize) {
this.pageSize = pageSize;
}
public Integer getPageStart() {
return pageStart;
}
public void setPageStart(Integer pageStart) {
this.pageStart = pageStart;
}
public Integer getCacheSize() {
return cacheSize;
}
public void setCacheSize(Integer cacheSize) {
this.cacheSize = cacheSize;
}
public List<Document> getCacheList() {
return cacheList;
}
public void setCacheList(List<Document> cacheList) {
this.cacheList = cacheList;
}
// public boolean isFirst() {
// return isFirst;
// }
//
// public void setFirst(boolean isFirst) {
// this.isFirst = isFirst;
// }
}
测试类:
[java] view
plaincopy
![](https://code.csdn.net/assets/CODE_ico.png)
![](https://code.csdn.net/assets/ico_fork.svg)
package test;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import javax.print.Doc;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import com.hwt.lucene.index.CachePage;
import com.hwt.lucene.index.IndexField;
import com.hwt.lucene.index.IndexUtils;
public class Test {
public static void main(String[] args) throws IOException, ParseException {
List<IndexField> fieldIndexs2 = new ArrayList<IndexField>();
IndexField ind3 = new IndexField();
ind3.setFieldName("title");
ind3.setFieldValue("美国攻打伊朗");
ind3.setFieldStore(Store.YES);
ind3.setFieldAnalyzed(Index.ANALYZED);
fieldIndexs2.add(ind3);
IndexField ind = new IndexField();
ind.setFieldName("content");
ind.setFieldValue("美国派兵3333,航母出发了,中国航公出发");
ind.setFieldStore(Store.YES);
ind.setFieldAnalyzed(Index.ANALYZED);
fieldIndexs2.add(ind);
IndexField ind2 = new IndexField();
ind2.setFieldName("Id");
ind2.setFieldValue("12");
ind2.setFieldStore(Store.YES);
ind2.setFieldAnalyzed(Index.NOT_ANALYZED);
fieldIndexs2.add(ind2);
//创建索引
// IndexUtils.createIndex(fieldIndexs2);
//删除索引
// IndexUtils.delete(new Term("Id","2"));
//修改索引
// IndexUtils.updateIndex(fieldIndexs2, new Term("Id","2"));
Analyzer analyzer = IndexUtils.getAnalyzer();
QueryParser titleParser = new QueryParser(Version.LUCENE_36,"title",analyzer);
QueryParser contentParser = new QueryParser(Version.LUCENE_36,"content",analyzer);
// Query contentQuery = new TermQuery(new Term("title","美国"));
Query titleQuery = titleParser.parse("美国");
Query contentQuery = contentParser.parse("美国");
BooleanQuery query = new BooleanQuery();
query.add(titleQuery, Occur.MUST);
query.add(contentQuery,Occur.SHOULD);
IndexSearcher searcher = IndexUtils.getIndexSearcher();
//排序对象:排序字段,排序字段类型,是否降序(默认false升序)
Sort sort = new Sort(new SortField("Id",SortField.INT, true));
//对多个字段进行排序
// Sort sort = new Sort(new SortField[]{new SortField("Id",SortField.INT, true),
// new SortField("title",SortField.INT, true)});
CachePage cachePage = new CachePage(1, 100);
Map map = cachePage.search(query, sort, 1);
System.out.println("起始页:"+ cachePage.getPageStart());
System.out.println("总页数:"+map.get("pageNum"));
System.out.println("总条数:"+map.get("totalNum"));
List<Document> docs = (List<Document>) map.get("list");
for (Document document : docs) {
System.out.println(document.get("Id"));
System.out.println(document.get("title"));
System.out.println(document.get("content"));
}
System.out.println("+++++++++++++++++++");
cachePage.refleshCache();
//
//查询缓存的
Map map2 = cachePage.search(query, sort, 4);
System.out.println("起始页:"+ cachePage.getPageStart());
System.out.println("总页数:"+map2.get("pageNum"));
System.out.println("总条数:"+map2.get("totalNum"));
List<Document> docs2 = (List<Document>) map2.get("list");
for (Document document : docs2) {
System.out.println(document.get("Id"));
System.out.println(document.get("title"));
System.out.println(document.get("content"));
}
//
// System.out.println("+++++++++++++++++++");
// Map map3 = cachePage.search(query, sort, 5);
// System.out.println("总页数:"+map3.get("pageNum"));
// System.out.println("总条数:"+map3.get("totalNum"));
// List<Document> docs4 = (List<Document>) map3.get("list");
// for (Document document : docs4) {
// System.out.println(document.get("Id"));
// System.out.println(document.get("title"));
// System.out.println(document.get("content"));
// }
//对对个字段进行排序
// Sort sort = new Sort(new SortField[]{new SortField("Id",SortField.STRING, true),
// new SortField("title",SortField.STRING, true)});
// TopDocs docs = searcher.search(query,100,sort); //返回前100条记录
// docs.totalHits是所有的记录条数,与上面设置的100无关
// System.out.println("共找到"+docs.totalHits+"条记录");
//
// ScoreDoc[] scoreDocs = docs.scoreDocs;
//
// for (int i = 0,len = scoreDocs.length ; i < len; i++) {
// System.out.println(scoreDocs[i].doc);
// }
// for (ScoreDoc scoreDoc : scoreDocs) {
// int docid = scoreDoc.doc;
// Document document = searcher.doc(docid);
// System.out.println(document.get("Id"));
// System.out.println(document.get("title"));
// System.out.println(document.get("content"));
// System.out.println("===============================");
// }
// IndexSearcher indexSearcher = IndexUtils.getIndexSearcher();
//
// TopDocs topDocs = indexSearcher.search(query, 10);
//
// ScoreDoc[] docs = topDocs.scoreDocs;
// System.out.println("共找到:"+docs.length);
// for (ScoreDoc scoreDoc : docs) {
// int docid = scoreDoc.doc;
// Document document = indexSearcher.doc(docid);
// System.out.println(document.get("Id"));
// System.out.println(document.get("title"));
// System.out.println(document.get("content"));
// System.out.println("===============================");
// }
//
// }
// IndexUtils.deleteAll();
}
}
一,lucene的索引工具类
[java] view
plaincopy
![](https://code.csdn.net/assets/CODE_ico.png)
package com.hwt.lucene.index;
import java.io.File;
import java.io.IOException;
import java.util.List;
import net.paoding.analysis.analyzer.PaodingAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
/**
* lucene的索引工具类
*
* @author 黄文韬
*
*/
public class IndexUtils {
// 庖丁解牛分词器(单例)
private static Analyzer ANALYZER = new PaodingAnalyzer();
// 索引的路径
private static final String indexPath = "WebRoot/lucene/index";
/**
* 得到庖丁解牛分词器
*
* @return
*/
public static Analyzer getAnalyzer() {
return ANALYZER;
}
/**
* 得到路径对象
*
* @param path 相对路径
* @return
*/
public static Directory getDirectory(String path) {
Directory directory = null;
try {
directory = FSDirectory.open(new File(path));
} catch (IOException e) {
e.printStackTrace();
}
return directory;
}
/**
* 得到读索引类
* @return
*/
public static IndexReader getIndexReader() {
IndexReader reader = null;
try {
reader = IndexReader.open(getDirectory(indexPath));
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return reader;
}
/**
* 得到些索引类
* @return
*/
public static IndexWriter getIndexWriter() {
IndexWriter writer = null;
try {
writer = new IndexWriter(getDirectory(indexPath),
new IndexWriterConfig(Version.LUCENE_36, ANALYZER));
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return writer;
}
/**
* 得到索引搜索类
* @return
*/
public static IndexSearcher getIndexSearcher() {
IndexSearcher searcher = null;
try {
searcher = new IndexSearcher(getIndexReader());
} catch (Exception e) {
e.printStackTrace();
}
return searcher;
}
/**
* 创建索引
*
* @param result
*/
public static void createIndex(List<IndexField> result) {
// 得到输出索引类
IndexWriter indexWriter = null;
// 索引类
try {
indexWriter = getIndexWriter();
Document doc = new Document();
for (IndexField findx : result) {
// 是否存储:Store.YES/Store.NO
// 是否分词:
// Index.ANALYZED/Index.NOT_ANALYZED/Index.NO/Index.ANALYZED_NO_NORMS
doc.add(new Field(findx.getFieldName(), findx.getFieldValue(),
findx.getFieldStore(), findx.getFieldAnalyzed()));
}
indexWriter.addDocument(doc);
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
// 关闭writer
indexWriter.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* 优化索引
*/
public static void mergeIndex() {
IndexWriter indexWriter = null;
// 强制优化索引
try {
indexWriter = getIndexWriter();
indexWriter.forceMerge(1);
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
indexWriter.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* 更新所有
* @param fields 新的document字段信息
* @param term 需要替换的查找条件
*/
public static void updateIndex(List<IndexField> fields, Term term) {
// 得到输出索引类
IndexWriter indexWriter = null;
// 索引类
try {
indexWriter = getIndexWriter();
Document doc = new Document();
// 是否存储:Store.YES/Store.NO
// 是否分词:
// Index.ANALYZED/Index.NOT_ANALYZED/Index.NO/Index.ANALYZED_NO_NORMS
for (IndexField field : fields) {
doc.add(new Field(field.getFieldName(), field.getFieldValue(),
field.getFieldStore(), field.getFieldAnalyzed()));
}
indexWriter.updateDocument(term, doc, ANALYZER);
indexWriter.forceMerge(1);
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
// 关闭writer
indexWriter.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* 删除全部索引文件
*/
public static void deleteAll() {
IndexWriter writer = null;
try {
writer = getIndexWriter();
writer.deleteAll();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* 根据条件删除索引
* @param term 条件
*/
public static void delete(Term term) {
IndexWriter writer = null;
IndexReader reader = getIndexReader();
try {
writer = getIndexWriter();
writer.deleteDocuments(term);
writer.forceMerge(1);
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
二,文件类型的搜索
[java] view
plaincopy
![](https://code.csdn.net/assets/CODE_ico.png)
package com.hwt.lucene.index;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
/**
* 文件类型的搜索
* @author 黄文韬
*
*/
public class FileDocument {
/**
* 将文件转换为一个document对象
* @param file 文件
* @return
*/
public Document fileToDocument(File file){
Document document=new Document();
document.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED));
document.add(new Field("content", this.readFileRetStr(file), Store.YES, Index.ANALYZED));
return document;
}
/**
* 将名字、内容字段转为document
* @param content 内容
* @param name 文件名字
* @return
*/
public Document stringToDocumet(String name,String content){
Document document=new Document();
document.add(new Field("name",name, Store.YES, Index.ANALYZED));
document.add(new Field("content", content, Store.YES, Index.ANALYZED));
return document;
}
/**
* 将文件内容转为string类型
* @param file 文件
* @return
*/
public String readFileRetStr(File file){
FileInputStream fStream = null;
String tempStr = "";
StringBuffer sBuffer = new StringBuffer();
try {
fStream = new FileInputStream(file);
BufferedReader bReader=new BufferedReader(new InputStreamReader(fStream,"UTF-8"));
while((tempStr=bReader.readLine())!=null){
sBuffer.append(tempStr);
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
fStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return sBuffer.toString();
}
}
三,封装索引字段类
[java] view
plaincopy
![](https://code.csdn.net/assets/CODE_ico.png)
package com.hwt.lucene.index;
import java.io.Serializable;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
/**
* 封装索引字段类
* @author hwt
*
*/
public class IndexField implements Serializable{
private String fieldName;
private String fieldValue;
private Store fieldStore;//是否存储:Store.YES/Store.NO
private Index fieldAnalyzed;//是否分词: Index.ANALYZED/Index.NOT_ANALYZED/Index.NO/Index.ANALYZED_NO_NORMS
public String getFieldName() {
return fieldName;
}
public void setFieldName(String fieldName) {
this.fieldName = fieldName;
}
public String getFieldValue() {
return fieldValue;
}
public void setFieldValue(String fieldValue) {
this.fieldValue = fieldValue;
}
public Store getFieldStore() {
return fieldStore;
}
public void setFieldStore(Store fieldStore) {
this.fieldStore = fieldStore;
}
public Index getFieldAnalyzed() {
return fieldAnalyzed;
}
public void setFieldAnalyzed(Index fieldAnalyzed) {
this.fieldAnalyzed = fieldAnalyzed;
}
}
四,分页缓存类
[java] view
plaincopy
![](https://code.csdn.net/assets/CODE_ico.png)
package com.hwt.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
public class CachePage {
private static final Logger LOGGER = Logger.getLogger(CachePage.class);
private int pageStart = 1; // 页码
private int pageSize = 15; // 每页显示的大小
private int pageNum = 0; //总页数
private int totalNum = 0; //总记录条数
private int cacheSize = 100; // 缓存大小
private List<Document> cacheList = new ArrayList<Document>(); // 缓存列表
/**
* 构造方法
* @param pageSize 每页大小
* @param cacheSize 缓存大小
*/
public CachePage(Integer pageSize, Integer cacheSize) {
this.pageSize = pageSize;
if (cacheSize != null) {
this.cacheSize = cacheSize;
}
}
/**
* 判断是否存在缓存中
*
* @param page
* 页码
* @return
*/
public boolean inCache(int page) {
// 当前缓存对象的个数
int cacheNum = cacheList.size();
if (cacheNum > 0) {
if (page <= 0) {
page = 1;
}
// 判断当前页是不是在缓存中
if (page >= pageStart && (page - pageStart) * pageSize <= cacheNum) {
return true;
} else {
return false;
}
}else {
return false;
}
}
/**
* 清空缓存
* @param pageNum 起始页
*/
public void refleshCache() {
// this.isFirst = true;
for (int i = cacheList.size() -1 ; i >= 0; i--) {
cacheList.remove(i);
}
}
/**
* 新增缓存
*
* @param doc
*/
public void addCache(Document doc) {
if (this.cacheList.size() < cacheSize) {
this.cacheList.add(doc);
} else {
LOGGER.info("缓存池已满");
}
}
/**
* 读缓存中的数据
* @param page
* @return
*/
public Map readCache(int page) {
// 判断是否存在于缓存池中
int start = (page - pageStart) * pageSize;
int end = start + pageSize > cacheList.size() ? cacheList.size()
: start + pageSize;
//缓存中的结果集
List<Document> cacheRs = new ArrayList<Document>();
for (int i = start; i < end; i++) {
cacheRs.add(cacheList.get(i));
}
//缓存结果集
Map resultMap = new HashMap();
resultMap.put("currentPage", page); //当前页
resultMap.put("totalNum", totalNum); //总记录条数
resultMap.put("pageNum", pageNum); //总页数
resultMap.put("list", cacheRs);
return resultMap;
}
/**
* 搜索
* @param query query对象
* @param sort 排序对象
* @param page 页码
* @return
*/
public Map search(Query query,Sort sort,int page){
if (page < 0) {
page = 1;
}
//如果存在缓存中
if (inCache(page)) {
return readCache(page);
}else {//如果不在缓存中
IndexSearcher searcher = IndexUtils.getIndexSearcher();
try {
//显示条数
int querySize = (page*pageSize / cacheSize + 1 )*100;
//设置查询、查询显示的条数、排序对象
TopDocs topDocs = searcher.search(query, querySize , sort);
//总共记录条数
int totalNum = topDocs.totalHits;
int pageNum = totalNum % pageSize == 0 ? totalNum / pageSize : totalNum / pageSize + 1;
if (page > pageNum) {
page = pageNum;
}
//得到记录集
ScoreDoc[] docs = topDocs.scoreDocs;
//保存当前页的前后两页放入缓存中
int startPage = 1;
int endPage = 1;
if (page < 3) { //前五页
startPage = 1;
endPage = startPage + 4 > pageNum ? pageNum : startPage + 4;
}else if(page > pageNum - 2){ //后五页
endPage = pageNum ;
startPage = endPage - 4 < 0 ? 1 : endPage - 4;
} else { //中间页
startPage = page - 2 <= 0 ? 1 : page - 2;
endPage = page + 2 > pageNum ? pageNum : page + 2;
}
//清空缓存
refleshCache();
int startSize = (startPage - 1)*pageSize ;
int endSize = startSize + cacheSize > totalNum ? totalNum : startSize + cacheSize ;
//将对象加入缓存中
for (int i = startSize ; i < endSize; i++) {
Document doc = searcher.doc(docs[i].doc);
addCache(doc);
}
//替换缓存集合
this.pageNum = pageNum;
this.totalNum = totalNum;
this.pageStart = startPage;
return readCache(page);
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
}
public Integer getPageSize() {
return pageSize;
}
public void setPageSize(Integer pageSize) {
this.pageSize = pageSize;
}
public Integer getPageStart() {
return pageStart;
}
public void setPageStart(Integer pageStart) {
this.pageStart = pageStart;
}
public Integer getCacheSize() {
return cacheSize;
}
public void setCacheSize(Integer cacheSize) {
this.cacheSize = cacheSize;
}
public List<Document> getCacheList() {
return cacheList;
}
public void setCacheList(List<Document> cacheList) {
this.cacheList = cacheList;
}
// public boolean isFirst() {
// return isFirst;
// }
//
// public void setFirst(boolean isFirst) {
// this.isFirst = isFirst;
// }
}
测试类:
[java] view
plaincopy
![](https://code.csdn.net/assets/CODE_ico.png)
package test;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import javax.print.Doc;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import com.hwt.lucene.index.CachePage;
import com.hwt.lucene.index.IndexField;
import com.hwt.lucene.index.IndexUtils;
public class Test {
public static void main(String[] args) throws IOException, ParseException {
List<IndexField> fieldIndexs2 = new ArrayList<IndexField>();
IndexField ind3 = new IndexField();
ind3.setFieldName("title");
ind3.setFieldValue("美国攻打伊朗");
ind3.setFieldStore(Store.YES);
ind3.setFieldAnalyzed(Index.ANALYZED);
fieldIndexs2.add(ind3);
IndexField ind = new IndexField();
ind.setFieldName("content");
ind.setFieldValue("美国派兵3333,航母出发了,中国航公出发");
ind.setFieldStore(Store.YES);
ind.setFieldAnalyzed(Index.ANALYZED);
fieldIndexs2.add(ind);
IndexField ind2 = new IndexField();
ind2.setFieldName("Id");
ind2.setFieldValue("12");
ind2.setFieldStore(Store.YES);
ind2.setFieldAnalyzed(Index.NOT_ANALYZED);
fieldIndexs2.add(ind2);
//创建索引
// IndexUtils.createIndex(fieldIndexs2);
//删除索引
// IndexUtils.delete(new Term("Id","2"));
//修改索引
// IndexUtils.updateIndex(fieldIndexs2, new Term("Id","2"));
Analyzer analyzer = IndexUtils.getAnalyzer();
QueryParser titleParser = new QueryParser(Version.LUCENE_36,"title",analyzer);
QueryParser contentParser = new QueryParser(Version.LUCENE_36,"content",analyzer);
// Query contentQuery = new TermQuery(new Term("title","美国"));
Query titleQuery = titleParser.parse("美国");
Query contentQuery = contentParser.parse("美国");
BooleanQuery query = new BooleanQuery();
query.add(titleQuery, Occur.MUST);
query.add(contentQuery,Occur.SHOULD);
IndexSearcher searcher = IndexUtils.getIndexSearcher();
//排序对象:排序字段,排序字段类型,是否降序(默认false升序)
Sort sort = new Sort(new SortField("Id",SortField.INT, true));
//对多个字段进行排序
// Sort sort = new Sort(new SortField[]{new SortField("Id",SortField.INT, true),
// new SortField("title",SortField.INT, true)});
CachePage cachePage = new CachePage(1, 100);
Map map = cachePage.search(query, sort, 1);
System.out.println("起始页:"+ cachePage.getPageStart());
System.out.println("总页数:"+map.get("pageNum"));
System.out.println("总条数:"+map.get("totalNum"));
List<Document> docs = (List<Document>) map.get("list");
for (Document document : docs) {
System.out.println(document.get("Id"));
System.out.println(document.get("title"));
System.out.println(document.get("content"));
}
System.out.println("+++++++++++++++++++");
cachePage.refleshCache();
//
//查询缓存的
Map map2 = cachePage.search(query, sort, 4);
System.out.println("起始页:"+ cachePage.getPageStart());
System.out.println("总页数:"+map2.get("pageNum"));
System.out.println("总条数:"+map2.get("totalNum"));
List<Document> docs2 = (List<Document>) map2.get("list");
for (Document document : docs2) {
System.out.println(document.get("Id"));
System.out.println(document.get("title"));
System.out.println(document.get("content"));
}
//
// System.out.println("+++++++++++++++++++");
// Map map3 = cachePage.search(query, sort, 5);
// System.out.println("总页数:"+map3.get("pageNum"));
// System.out.println("总条数:"+map3.get("totalNum"));
// List<Document> docs4 = (List<Document>) map3.get("list");
// for (Document document : docs4) {
// System.out.println(document.get("Id"));
// System.out.println(document.get("title"));
// System.out.println(document.get("content"));
// }
//对对个字段进行排序
// Sort sort = new Sort(new SortField[]{new SortField("Id",SortField.STRING, true),
// new SortField("title",SortField.STRING, true)});
// TopDocs docs = searcher.search(query,100,sort); //返回前100条记录
// docs.totalHits是所有的记录条数,与上面设置的100无关
// System.out.println("共找到"+docs.totalHits+"条记录");
//
// ScoreDoc[] scoreDocs = docs.scoreDocs;
//
// for (int i = 0,len = scoreDocs.length ; i < len; i++) {
// System.out.println(scoreDocs[i].doc);
// }
// for (ScoreDoc scoreDoc : scoreDocs) {
// int docid = scoreDoc.doc;
// Document document = searcher.doc(docid);
// System.out.println(document.get("Id"));
// System.out.println(document.get("title"));
// System.out.println(document.get("content"));
// System.out.println("===============================");
// }
// IndexSearcher indexSearcher = IndexUtils.getIndexSearcher();
//
// TopDocs topDocs = indexSearcher.search(query, 10);
//
// ScoreDoc[] docs = topDocs.scoreDocs;
// System.out.println("共找到:"+docs.length);
// for (ScoreDoc scoreDoc : docs) {
// int docid = scoreDoc.doc;
// Document document = indexSearcher.doc(docid);
// System.out.println(document.get("Id"));
// System.out.println(document.get("title"));
// System.out.println(document.get("content"));
// System.out.println("===============================");
// }
//
// }
// IndexUtils.deleteAll();
}
}
相关文章推荐
- 修改默认Netbeans JDK路径及JDK简介
- Spring + JDBC联合开发
- 要点Java13 继承Inheritance
- 字符串逆转(递归和非递归java)
- 第一个Struts2程序
- Java的23种设计模式
- eclipse快捷键
- Spring框架的IoC和AOP
- eclipse插件推荐 ftl properties
- Java WEB开发实战 之 第六部分:Taglib基本知识和基本开发
- Attach android source code in eclipse
- 在Netbeans上配置Android开发环境~
- Eclipse编辑器基本设置
- android logcat显示eclipse
- ExtJs读取服务器端数据 spring+springJDBC+struts2+extjs技术
- eclipse配置 android
- java Map
- java中常用的字符串的截取方法
- 2011-12-22 22:45 java中String s="abc"及String s=new String("abc")详解
- Java8 Lambda表达式教程