您的位置:首页 > 编程语言 > Java开发

lucene + spring

2014-03-20 13:54 232 查看
目录结果



一,lucene的索引工具类

[java] view
plaincopy





package com.hwt.lucene.index;

import java.io.File;

import java.io.IOException;

import java.util.List;

import net.paoding.analysis.analyzer.PaodingAnalyzer;

import org.apache.lucene.analysis.Analyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field;

import org.apache.lucene.index.CorruptIndexException;

import org.apache.lucene.index.IndexReader;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.index.IndexWriterConfig;

import org.apache.lucene.index.Term;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.FSDirectory;

import org.apache.lucene.store.LockObtainFailedException;

import org.apache.lucene.util.Version;

/**

* lucene的索引工具类

*

* @author 黄文韬

*

*/

public class IndexUtils {

// 庖丁解牛分词器(单例)

private static Analyzer ANALYZER = new PaodingAnalyzer();

// 索引的路径

private static final String indexPath = "WebRoot/lucene/index";

/**

* 得到庖丁解牛分词器

*

* @return

*/

public static Analyzer getAnalyzer() {

return ANALYZER;

}

/**

* 得到路径对象

*

* @param path 相对路径

* @return

*/

public static Directory getDirectory(String path) {

Directory directory = null;

try {

directory = FSDirectory.open(new File(path));

} catch (IOException e) {

e.printStackTrace();

}

return directory;

}

/**

* 得到读索引类

* @return

*/

public static IndexReader getIndexReader() {

IndexReader reader = null;

try {

reader = IndexReader.open(getDirectory(indexPath));

} catch (CorruptIndexException e) {

e.printStackTrace();

} catch (IOException e) {

e.printStackTrace();

}

return reader;

}

/**

* 得到些索引类

* @return

*/

public static IndexWriter getIndexWriter() {

IndexWriter writer = null;

try {

writer = new IndexWriter(getDirectory(indexPath),

new IndexWriterConfig(Version.LUCENE_36, ANALYZER));

} catch (CorruptIndexException e) {

e.printStackTrace();

} catch (LockObtainFailedException e) {

e.printStackTrace();

} catch (IOException e) {

e.printStackTrace();

}

return writer;

}

/**

* 得到索引搜索类

* @return

*/

public static IndexSearcher getIndexSearcher() {

IndexSearcher searcher = null;

try {

searcher = new IndexSearcher(getIndexReader());

} catch (Exception e) {

e.printStackTrace();

}

return searcher;

}

/**

* 创建索引

*

* @param result

*/

public static void createIndex(List<IndexField> result) {

// 得到输出索引类

IndexWriter indexWriter = null;

// 索引类

try {

indexWriter = getIndexWriter();

Document doc = new Document();

for (IndexField findx : result) {

// 是否存储:Store.YES/Store.NO

// 是否分词:

// Index.ANALYZED/Index.NOT_ANALYZED/Index.NO/Index.ANALYZED_NO_NORMS

doc.add(new Field(findx.getFieldName(), findx.getFieldValue(),

findx.getFieldStore(), findx.getFieldAnalyzed()));

}

indexWriter.addDocument(doc);

} catch (CorruptIndexException e) {

e.printStackTrace();

} catch (IOException e) {

e.printStackTrace();

} finally {

try {

// 关闭writer

indexWriter.close();

} catch (CorruptIndexException e) {

e.printStackTrace();

} catch (IOException e) {

e.printStackTrace();

}

}

}

/**

* 优化索引

*/

public static void mergeIndex() {

IndexWriter indexWriter = null;

// 强制优化索引

try {

indexWriter = getIndexWriter();

indexWriter.forceMerge(1);

} catch (CorruptIndexException e) {

e.printStackTrace();

} catch (IOException e) {

e.printStackTrace();

} finally {

try {

indexWriter.close();

} catch (CorruptIndexException e) {

e.printStackTrace();

} catch (IOException e) {

e.printStackTrace();

}

}

}

/**

* 更新所有

* @param fields 新的document字段信息

* @param term 需要替换的查找条件

*/

public static void updateIndex(List<IndexField> fields, Term term) {

// 得到输出索引类

IndexWriter indexWriter = null;

// 索引类

try {

indexWriter = getIndexWriter();

Document doc = new Document();

// 是否存储:Store.YES/Store.NO

// 是否分词:

// Index.ANALYZED/Index.NOT_ANALYZED/Index.NO/Index.ANALYZED_NO_NORMS

for (IndexField field : fields) {

doc.add(new Field(field.getFieldName(), field.getFieldValue(),

field.getFieldStore(), field.getFieldAnalyzed()));

}

indexWriter.updateDocument(term, doc, ANALYZER);

indexWriter.forceMerge(1);

} catch (CorruptIndexException e) {

e.printStackTrace();

} catch (IOException e) {

e.printStackTrace();

} finally {

try {

// 关闭writer

indexWriter.close();

} catch (CorruptIndexException e) {

e.printStackTrace();

} catch (IOException e) {

e.printStackTrace();

}

}

}

/**

* 删除全部索引文件

*/

public static void deleteAll() {

IndexWriter writer = null;

try {

writer = getIndexWriter();

writer.deleteAll();

} catch (IOException e) {

e.printStackTrace();

} finally {

try {

writer.close();

} catch (CorruptIndexException e) {

e.printStackTrace();

} catch (IOException e) {

e.printStackTrace();

}

}

}

/**

* 根据条件删除索引

* @param term 条件

*/

public static void delete(Term term) {

IndexWriter writer = null;

IndexReader reader = getIndexReader();

try {

writer = getIndexWriter();

writer.deleteDocuments(term);

writer.forceMerge(1);

} catch (IOException e) {

e.printStackTrace();

} finally {

try {

writer.close();

} catch (CorruptIndexException e) {

e.printStackTrace();

} catch (IOException e) {

e.printStackTrace();

}

}

}

}

二,文件类型的搜索

[java] view
plaincopy





package com.hwt.lucene.index;

import java.io.BufferedReader;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.IOException;

import java.io.InputStreamReader;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field;

import org.apache.lucene.document.Field.Index;

import org.apache.lucene.document.Field.Store;

/**

* 文件类型的搜索

* @author 黄文韬

*

*/

public class FileDocument {

/**

* 将文件转换为一个document对象

* @param file 文件

* @return

*/

public Document fileToDocument(File file){

Document document=new Document();

document.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED));

document.add(new Field("content", this.readFileRetStr(file), Store.YES, Index.ANALYZED));

return document;

}

/**

* 将名字、内容字段转为document

* @param content 内容

* @param name 文件名字

* @return

*/

public Document stringToDocumet(String name,String content){

Document document=new Document();

document.add(new Field("name",name, Store.YES, Index.ANALYZED));

document.add(new Field("content", content, Store.YES, Index.ANALYZED));

return document;

}

/**

* 将文件内容转为string类型

* @param file 文件

* @return

*/

public String readFileRetStr(File file){

FileInputStream fStream = null;

String tempStr = "";

StringBuffer sBuffer = new StringBuffer();

try {

fStream = new FileInputStream(file);

BufferedReader bReader=new BufferedReader(new InputStreamReader(fStream,"UTF-8"));

while((tempStr=bReader.readLine())!=null){

sBuffer.append(tempStr);

}

} catch (FileNotFoundException e) {

e.printStackTrace();

} catch (IOException e) {

e.printStackTrace();

} finally {

try {

fStream.close();

} catch (IOException e) {

e.printStackTrace();

}

}

return sBuffer.toString();

}

}

三,封装索引字段类

[java] view
plaincopy





package com.hwt.lucene.index;

import java.io.Serializable;

import org.apache.lucene.document.Field.Index;

import org.apache.lucene.document.Field.Store;

/**

* 封装索引字段类

* @author hwt

*

*/

public class IndexField implements Serializable{

private String fieldName;

private String fieldValue;

private Store fieldStore;//是否存储:Store.YES/Store.NO

private Index fieldAnalyzed;//是否分词: Index.ANALYZED/Index.NOT_ANALYZED/Index.NO/Index.ANALYZED_NO_NORMS

public String getFieldName() {

return fieldName;

}

public void setFieldName(String fieldName) {

this.fieldName = fieldName;

}

public String getFieldValue() {

return fieldValue;

}

public void setFieldValue(String fieldValue) {

this.fieldValue = fieldValue;

}

public Store getFieldStore() {

return fieldStore;

}

public void setFieldStore(Store fieldStore) {

this.fieldStore = fieldStore;

}

public Index getFieldAnalyzed() {

return fieldAnalyzed;

}

public void setFieldAnalyzed(Index fieldAnalyzed) {

this.fieldAnalyzed = fieldAnalyzed;

}

}

四,分页缓存类

[java] view
plaincopy





package com.hwt.lucene.index;

import java.io.IOException;

import java.util.ArrayList;

import java.util.HashMap;

import java.util.List;

import java.util.Map;

import org.apache.log4j.Logger;

import org.apache.lucene.document.Document;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.Query;

import org.apache.lucene.search.ScoreDoc;

import org.apache.lucene.search.Sort;

import org.apache.lucene.search.TopDocs;

public class CachePage {

private static final Logger LOGGER = Logger.getLogger(CachePage.class);

private int pageStart = 1; // 页码

private int pageSize = 15; // 每页显示的大小

private int pageNum = 0; //总页数

private int totalNum = 0; //总记录条数

private int cacheSize = 100; // 缓存大小

private List<Document> cacheList = new ArrayList<Document>(); // 缓存列表

/**

* 构造方法

* @param pageSize 每页大小

* @param cacheSize 缓存大小

*/

public CachePage(Integer pageSize, Integer cacheSize) {

this.pageSize = pageSize;

if (cacheSize != null) {

this.cacheSize = cacheSize;

}

}

/**

* 判断是否存在缓存中

*

* @param page

* 页码

* @return

*/

public boolean inCache(int page) {

// 当前缓存对象的个数

int cacheNum = cacheList.size();

if (cacheNum > 0) {

if (page <= 0) {

page = 1;

}

// 判断当前页是不是在缓存中

if (page >= pageStart && (page - pageStart) * pageSize <= cacheNum) {

return true;

} else {

return false;

}

}else {

return false;

}

}

/**

* 清空缓存

* @param pageNum 起始页

*/

public void refleshCache() {

// this.isFirst = true;

for (int i = cacheList.size() -1 ; i >= 0; i--) {

cacheList.remove(i);

}

}

/**

* 新增缓存

*

* @param doc

*/

public void addCache(Document doc) {

if (this.cacheList.size() < cacheSize) {

this.cacheList.add(doc);

} else {

LOGGER.info("缓存池已满");

}

}

/**

* 读缓存中的数据

* @param page

* @return

*/

public Map readCache(int page) {

// 判断是否存在于缓存池中

int start = (page - pageStart) * pageSize;

int end = start + pageSize > cacheList.size() ? cacheList.size()

: start + pageSize;

//缓存中的结果集

List<Document> cacheRs = new ArrayList<Document>();

for (int i = start; i < end; i++) {

cacheRs.add(cacheList.get(i));

}

//缓存结果集

Map resultMap = new HashMap();

resultMap.put("currentPage", page); //当前页

resultMap.put("totalNum", totalNum); //总记录条数

resultMap.put("pageNum", pageNum); //总页数

resultMap.put("list", cacheRs);

return resultMap;

}

/**

* 搜索

* @param query query对象

* @param sort 排序对象

* @param page 页码

* @return

*/

public Map search(Query query,Sort sort,int page){

if (page < 0) {

page = 1;

}

//如果存在缓存中

if (inCache(page)) {

return readCache(page);

}else {//如果不在缓存中

IndexSearcher searcher = IndexUtils.getIndexSearcher();

try {

//显示条数

int querySize = (page*pageSize / cacheSize + 1 )*100;

//设置查询、查询显示的条数、排序对象

TopDocs topDocs = searcher.search(query, querySize , sort);

//总共记录条数

int totalNum = topDocs.totalHits;

int pageNum = totalNum % pageSize == 0 ? totalNum / pageSize : totalNum / pageSize + 1;

if (page > pageNum) {

page = pageNum;

}

//得到记录集

ScoreDoc[] docs = topDocs.scoreDocs;

//保存当前页的前后两页放入缓存中

int startPage = 1;

int endPage = 1;

if (page < 3) { //前五页

startPage = 1;

endPage = startPage + 4 > pageNum ? pageNum : startPage + 4;

}else if(page > pageNum - 2){ //后五页

endPage = pageNum ;

startPage = endPage - 4 < 0 ? 1 : endPage - 4;

} else { //中间页

startPage = page - 2 <= 0 ? 1 : page - 2;

endPage = page + 2 > pageNum ? pageNum : page + 2;

}

//清空缓存

refleshCache();

int startSize = (startPage - 1)*pageSize ;

int endSize = startSize + cacheSize > totalNum ? totalNum : startSize + cacheSize ;

//将对象加入缓存中

for (int i = startSize ; i < endSize; i++) {

Document doc = searcher.doc(docs[i].doc);

addCache(doc);

}

//替换缓存集合

this.pageNum = pageNum;

this.totalNum = totalNum;

this.pageStart = startPage;

return readCache(page);

} catch (IOException e) {

e.printStackTrace();

return null;

}

}

}

public Integer getPageSize() {

return pageSize;

}

public void setPageSize(Integer pageSize) {

this.pageSize = pageSize;

}

public Integer getPageStart() {

return pageStart;

}

public void setPageStart(Integer pageStart) {

this.pageStart = pageStart;

}

public Integer getCacheSize() {

return cacheSize;

}

public void setCacheSize(Integer cacheSize) {

this.cacheSize = cacheSize;

}

public List<Document> getCacheList() {

return cacheList;

}

public void setCacheList(List<Document> cacheList) {

this.cacheList = cacheList;

}

// public boolean isFirst() {

// return isFirst;

// }

//

// public void setFirst(boolean isFirst) {

// this.isFirst = isFirst;

// }

}

测试类:

[java] view
plaincopy





package test;

import java.io.File;

import java.io.IOException;

import java.util.ArrayList;

import java.util.List;

import java.util.Map;

import javax.print.Doc;

import org.apache.lucene.analysis.Analyzer;

import org.apache.lucene.analysis.cjk.CJKAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field.Index;

import org.apache.lucene.document.Field.Store;

import org.apache.lucene.index.IndexReader;

import org.apache.lucene.index.Term;

import org.apache.lucene.queryParser.ParseException;

import org.apache.lucene.queryParser.QueryParser;

import org.apache.lucene.search.BooleanClause.Occur;

import org.apache.lucene.search.BooleanQuery;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.Query;

import org.apache.lucene.search.ScoreDoc;

import org.apache.lucene.search.Searcher;

import org.apache.lucene.search.Sort;

import org.apache.lucene.search.SortField;

import org.apache.lucene.search.TermQuery;

import org.apache.lucene.search.TopDocs;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.FSDirectory;

import org.apache.lucene.util.Version;

import org.springframework.context.ApplicationContext;

import org.springframework.context.support.ClassPathXmlApplicationContext;

import com.hwt.lucene.index.CachePage;

import com.hwt.lucene.index.IndexField;

import com.hwt.lucene.index.IndexUtils;

public class Test {

public static void main(String[] args) throws IOException, ParseException {

List<IndexField> fieldIndexs2 = new ArrayList<IndexField>();

IndexField ind3 = new IndexField();

ind3.setFieldName("title");

ind3.setFieldValue("美国攻打伊朗");

ind3.setFieldStore(Store.YES);

ind3.setFieldAnalyzed(Index.ANALYZED);

fieldIndexs2.add(ind3);

IndexField ind = new IndexField();

ind.setFieldName("content");

ind.setFieldValue("美国派兵3333,航母出发了,中国航公出发");

ind.setFieldStore(Store.YES);

ind.setFieldAnalyzed(Index.ANALYZED);

fieldIndexs2.add(ind);

IndexField ind2 = new IndexField();

ind2.setFieldName("Id");

ind2.setFieldValue("12");

ind2.setFieldStore(Store.YES);

ind2.setFieldAnalyzed(Index.NOT_ANALYZED);

fieldIndexs2.add(ind2);

//创建索引

// IndexUtils.createIndex(fieldIndexs2);

//删除索引

// IndexUtils.delete(new Term("Id","2"));

//修改索引

// IndexUtils.updateIndex(fieldIndexs2, new Term("Id","2"));

Analyzer analyzer = IndexUtils.getAnalyzer();

QueryParser titleParser = new QueryParser(Version.LUCENE_36,"title",analyzer);

QueryParser contentParser = new QueryParser(Version.LUCENE_36,"content",analyzer);

// Query contentQuery = new TermQuery(new Term("title","美国"));

Query titleQuery = titleParser.parse("美国");

Query contentQuery = contentParser.parse("美国");

BooleanQuery query = new BooleanQuery();

query.add(titleQuery, Occur.MUST);

query.add(contentQuery,Occur.SHOULD);

IndexSearcher searcher = IndexUtils.getIndexSearcher();

//排序对象:排序字段,排序字段类型,是否降序(默认false升序)

Sort sort = new Sort(new SortField("Id",SortField.INT, true));

//对多个字段进行排序

// Sort sort = new Sort(new SortField[]{new SortField("Id",SortField.INT, true),

// new SortField("title",SortField.INT, true)});

CachePage cachePage = new CachePage(1, 100);

Map map = cachePage.search(query, sort, 1);

System.out.println("起始页:"+ cachePage.getPageStart());

System.out.println("总页数:"+map.get("pageNum"));

System.out.println("总条数:"+map.get("totalNum"));

List<Document> docs = (List<Document>) map.get("list");

for (Document document : docs) {

System.out.println(document.get("Id"));

System.out.println(document.get("title"));

System.out.println(document.get("content"));

}

System.out.println("+++++++++++++++++++");

cachePage.refleshCache();

//

//查询缓存的

Map map2 = cachePage.search(query, sort, 4);

System.out.println("起始页:"+ cachePage.getPageStart());

System.out.println("总页数:"+map2.get("pageNum"));

System.out.println("总条数:"+map2.get("totalNum"));

List<Document> docs2 = (List<Document>) map2.get("list");

for (Document document : docs2) {

System.out.println(document.get("Id"));

System.out.println(document.get("title"));

System.out.println(document.get("content"));

}

//

// System.out.println("+++++++++++++++++++");

// Map map3 = cachePage.search(query, sort, 5);

// System.out.println("总页数:"+map3.get("pageNum"));

// System.out.println("总条数:"+map3.get("totalNum"));

// List<Document> docs4 = (List<Document>) map3.get("list");

// for (Document document : docs4) {

// System.out.println(document.get("Id"));

// System.out.println(document.get("title"));

// System.out.println(document.get("content"));

// }

//对对个字段进行排序

// Sort sort = new Sort(new SortField[]{new SortField("Id",SortField.STRING, true),

// new SortField("title",SortField.STRING, true)});

// TopDocs docs = searcher.search(query,100,sort); //返回前100条记录

// docs.totalHits是所有的记录条数,与上面设置的100无关

// System.out.println("共找到"+docs.totalHits+"条记录");

//

// ScoreDoc[] scoreDocs = docs.scoreDocs;

//

// for (int i = 0,len = scoreDocs.length ; i < len; i++) {

// System.out.println(scoreDocs[i].doc);

// }

// for (ScoreDoc scoreDoc : scoreDocs) {

// int docid = scoreDoc.doc;

// Document document = searcher.doc(docid);

// System.out.println(document.get("Id"));

// System.out.println(document.get("title"));

// System.out.println(document.get("content"));

// System.out.println("===============================");

// }

// IndexSearcher indexSearcher = IndexUtils.getIndexSearcher();

//

// TopDocs topDocs = indexSearcher.search(query, 10);

//

// ScoreDoc[] docs = topDocs.scoreDocs;

// System.out.println("共找到:"+docs.length);

// for (ScoreDoc scoreDoc : docs) {

// int docid = scoreDoc.doc;

// Document document = indexSearcher.doc(docid);

// System.out.println(document.get("Id"));

// System.out.println(document.get("title"));

// System.out.println(document.get("content"));

// System.out.println("===============================");

// }

//

// }

// IndexUtils.deleteAll();

}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: