您的位置:首页 > 其它

lucene搜索简单的实例

2014-03-31 14:07 429 查看
最近项目要用到lucene做全文检索,刚开始迷茫的,回去看了一下,自己写了简单的实例,希望对一些初学者能所帮助,高手请绕道。

项目环境:

1.导入jar包

KAnalyzer3.2.0Stable.jar

lucene-analyzers-3.0.1.jar

lucene-core-3.0.1.jar

lucene-highlighter-3.0.1.jar

lucene-memory-3.0.1.jar

2.写一个demo

a.创建一个实体

DROP TABLE IF EXISTS `article`;

CREATE TABLE `article` (

`id` int(11) NOT NULL AUTO_INCREMENT,

`title` varchar(20) DEFAULT NULL,

`content` varchar(5000) DEFAULT NULL,

PRIMARY KEY (`id`)

) ENGINE=InnoDB DEFAULT CHARSET=utf8;

public class Article {

private int id;

private String title;

private String content;

................//get/set/toString方法省略

}

b.创建一个提供公共方法的类:

//提供封装分页数据的类:

@SuppressWarnings("rawtypes")

public class QueryResult {

private List list;

private int count;

..............get/set省略

}

//提供创建索引,目录的类

public class LuceneUtils{

private static Directory directory;// 建立索引库存储目录

private static Analyzer analyzer;// 创建分词器

private static IndexWriter indexWriter; // 在程序启动是初始化,建立索引

private static IndexSearcher indexSearcher;// 查询

static {

try {

// 加载配置文件lucene.properties,该文件中是创建索引库的路径"path=D:\\IindexSearch

Properties prop = new Properties();

InputStream inStream = LuceneUtils.class.getClassLoader().getResourceAsStream("lucene.properties");

//InputStream inStream = ClassLoader.getSystemResourceAsStream("lucene.properties");

prop.load(inStream);

directory = FSDirectory.open(new File(prop.getProperty("path")));

analyzer = new StandardAnalyzer(Version.LUCENE_30);

// 在程序启动是初始化,建立索引

indexWriter = new IndexWriter(directory, analyzer, MaxFieldLength.LIMITED);

//程序退出时关闭资源

Runtime.getRuntime().addShutdownHook(new Thread(){

public void run(){

try {

indexWriter.close();

} catch (Exception e) {

e.printStackTrace();

}

}

});

} catch (Exception e) {

e.printStackTrace();

}

}

public static Document objectToDocument(Object obj) {

Article article = (Article) obj;

// 将文档转为domcment

Document doc = new Document();

String idstr = NumericUtils.intToPrefixCoded(article.getId());

doc.add(new Field("id", idstr, Store.YES, Index.NOT_ANALYZED));

doc.add(new Field("title", article.getTitle(), Store.YES, Index.ANALYZED));

doc.add(new Field("content", article.getContent(), Store.YES, Index.ANALYZED));

return doc;

}

public static Object documentToObject(Document doc) {

Article article = new Article();

//将Document转为Article

//将字符串转化为数字

int id = NumericUtils.prefixCodedToInt(doc.get("id"));

article.setId(id);

article.setTitle(doc.get("title"));

article.setContent(doc.get("content"));

return article;

}

public static IndexWriter getIndexWriter() {

return indexWriter;

}

public static IndexSearcher getIndexSearch() {

// 执行查询

try {

indexSearcher = new IndexSearcher(directory);

} catch (Exception e) {

throw new RuntimeException(e);

}

return indexSearcher;

}

public static Directory getDirectory() {

return directory;

}

public static Analyzer getAnalyzer() {

return analyzer;

}

}

c: 创建增删改查方法

public class IndexDao {

/**

* //保存到索引库

*

* @return

* @throws Exception

*/

public void save(Article article) {

try {

// 将Aritcle转为Documnet

Document doc = LuceneUtils.objectToDocument(article);

// 建立索引

IndexWriter indexWriter = LuceneUtils.getIndexWriter();

indexWriter.addDocument(doc);

indexWriter.commit();

} catch (Exception e) {

throw new RuntimeException(e);

}

}

/**

* 删除索引库 Term 表示制定列中包含的关键字

*

* @return

* @throws Exception

*/

public void delete(Article article) {

String idStr = NumericUtils.intToPrefixCoded(article.getId());

Term term = new Term("id", idStr);

try {

// 建立索引

IndexWriter indexWriter = LuceneUtils.getIndexWriter();

indexWriter.deleteDocuments(term);// 删除指定Term总重的documnet数据

indexWriter.commit();

} catch (Exception e) {

throw new RuntimeException(e);

}

}

/**

* 修改索引库

*

* @return

* @throws Exception

*/

public void update(Article article) {

// 创建Term

String idStr = NumericUtils.intToPrefixCoded(article.getId());

Term term = new Term("id", idStr);

// 准备document

Document doc = LuceneUtils.objectToDocument(article);

try {

// 建立索引

IndexWriter indexWriter = LuceneUtils.getIndexWriter();

indexWriter.updateDocument(term, doc);// 删除指定Term总重的documnet数据

indexWriter.commit();

// 先删除,在创建

// indexWriter.deleteDocuments(term);

// indexWriter.addDocument(doc);

} catch (Exception e) {

throw new RuntimeException(e);

}

}

/**

* 查询索引库

*

* @return

* @throws Exception

*/

public QueryResult query(String queryString, int first, int max) {

IndexSearcher indexSearcher = null;

try {

// MultiFieldQueryParser:表示可以根据多个字段查询

int totail = first + max;

// 1.把字符串转为Query对象

QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_30, new String[] { "id", "title", "content" },

LuceneUtils.getAnalyzer());

Query query = parser.parse(queryString);

// 2.执行查询

indexSearcher = LuceneUtils.getIndexSearch();

// 指定排序条件

Sort sort = new Sort(new SortField("id", SortField.INT));// 按照id升序

TopDocs topDocs = indexSearcher.search(query, null, totail, sort);// 查询并返回最多的前n条数据

int count = topDocs.totalHits;// 总记录数

ScoreDoc[] scoreDoc = topDocs.scoreDocs;// 最多前n条结果数据

// 生成高亮显示器;设置前缀,后缀,摘要的大小

Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");

Scorer scorer = new QueryScorer(query);// 查询条件

Highlighter highlighter = new Highlighter(formatter, scorer);

highlighter.setTextFragmenter(new SimpleFragmenter(100));// 设置摘要的大小

// 3.取出数据

int endIndex = Math.min(totail, scoreDoc.length);

List<Article> list = new ArrayList<Article>();

for (int i = 0; i < endIndex; i++) {

// float score = scoreDoc[i].score;//平均得分

int docId = scoreDoc[i].doc;

Document doc = indexSearcher.doc(docId);

// 进行高亮操作,当没有找到关键词时,返回为null

String text = highlighter.getBestFragment(LuceneUtils.getAnalyzer(), "title", doc.get("title"));

if (text != null) {

doc.getField("title").setValue(text);

}

// 将Document转为Article

Article article = (Article) LuceneUtils.documentToObject(doc);

list.add(article);

}

QueryResult queryResult = new QueryResult(list, count);

return queryResult;

} catch (Exception e) {

throw new RuntimeException(e);

}

}

c;测试dao方法:

@Test

public void testSave() {

// 创建文档对象

Article article = new Article();

for (int i = 0; i < 20; i++) {

article.setId(i);

article.setTitle("Lucene搜索的方式");

article.setContent("全文检索是计算机程序通过扫描文章中的每一个词,对每一个词建立一个索引,指明该词在文章中出现的次数和位置。");

indexDao.save(article);

}

}

@Test

public void testDelete() {

Article article = new Article();

article.setId(1);

indexDao.delete(article);

}

@Test

public void testUpdate() {

// 创建文档对象

Article article = new Article();

article.setId(1);

article.setTitle("Lucene搜索的方式");

article.setContent("跟新索引库测试是否正确");

indexDao.update(article);

}

@Test

@SuppressWarnings("unchecked")

public void testQuery() {

String queryString = "Lucene";

QueryResult queryResult = indexDao.searchAndOrderBy(queryString, 0, 10);

System.out.println("count---------->" + queryResult.getCount());

List<Article> list = (List<Article>)queryResult.getList();

for(Article article:list){

System.err.println("list--------->" + article.toString());

}

到此lucenes测试例子完成。以下是本人使用过程中进行改造封装后,制作的模板,由于增删改中都要用到一些相同的代码,并且重复比较多,为此本人将重复的制作为一个方法,其他的不能确定的提供一个借口提供一个抽象方法,在调用者需要封装那些实体,即可调用接口中的方法并重写即可。此封装主要用到的是模板设计模式。

如下:

public class ComsLuceneUtils {

public interface CallBackQuery {

//不分页

//public abstract List documentToObject(Document doc, List list) throws Exception;

//分页方法

public abstract QueryResult documentToObject(Document doc, List list, int count) throws Exception;

public abstract Document objectToDocument(Object obj) throws Exception;

}

/**

* //保存到索引库

* CallBackQuery :保存函数

* @return

* @throws Exception

*/

public static void save(Article article, CallBackQuery callBack) {

try {

// 将Aritcle转为Documnet

Document doc = callBack.objectToDocument(article);

// 建立索引

IndexWriter indexWriter = LuceneUtils.getIndexWriter();

indexWriter.addDocument(doc);

indexWriter.commit();

} catch (Exception e) {

throw new RuntimeException(e);

}

}

/**

* 删除索引库 Term 表示制定列中包含的关键字

*

* @return

* @throws Exception

*/

public static void delete(Integer id) {

String idStr = NumericUtils.intToPrefixCoded(id);

Term term = new Term("id", idStr);

try {

// 建立索引

IndexWriter indexWriter = LuceneUtils.getIndexWriter();

indexWriter.deleteDocuments(term);// 删除指定Term总重的documnet数据

indexWriter.commit();

} catch (Exception e) {

throw new RuntimeException(e);

}

}

/**

* 修改索引库

* CallBackQuery :更新函数

* @return

* @throws Exception

*/

public static void update(Article article, CallBackQuery callBack) {

// 创建Term

String idStr = NumericUtils.intToPrefixCoded(article.getId());

Term term = new Term("id", idStr);

try {

// 将Object转换为Document对象

Document doc = callBack.objectToDocument(article);

// 建立索引

IndexWriter indexWriter = LuceneUtils.getIndexWriter();

indexWriter.updateDocument(term, doc);// 删除指定Term数的documnet数据

indexWriter.commit();

// 先删除,在创建

// indexWriter.deleteDocuments(term);

// indexWriter.addDocument(doc);

} catch (Exception e) {

throw new RuntimeException(e);

}

}

/**查询分页方法

*

* 查询索引库 querString: 查询字符串

* first : 开始位置(分页功能)

* max : 最大数(分页功能)

* parameter :查询指定的字段

* CallBackQuery : 查询函数

* QueryResult : 返回结果集合

* @throws Exception

*/

public static QueryResult query(String queryString, int first, int max, String[] parameter, CallBackQuery callback) {

IndexSearcher indexSearcher = null;

try {

// MultiFieldQueryParser:表示可以根据多个字段查询

int totail = first + max;

// 1.把字符串转为Query对象

QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_30, parameter, LuceneUtils.getAnalyzer());

Query query = parser.parse(queryString);

// 2.执行查询

indexSearcher = LuceneUtils.getIndexSearch();

// 指定排序条件

Sort sort = new Sort(new SortField("id", SortField.INT));// 按照id升序

TopDocs topDocs = indexSearcher.search(query, null, totail, sort);// 查询并返回最多的前n条数据

int count = topDocs.totalHits;// 总记录数

ScoreDoc[] scoreDoc = topDocs.scoreDocs;// 最多前n条结果数据

// 生成高亮显示器;设置前缀,后缀,摘要的大小

Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");

Scorer scorer = new QueryScorer(query);// 查询条件

Highlighter highlighter = new Highlighter(formatter, scorer);

highlighter.setTextFragmenter(new SimpleFragmenter(100));// 设置摘要的大小

// 3.取出数据

int endIndex = Math.min(totail, scoreDoc.length);

List<Article> list = new ArrayList<Article>();

for (int i = 0; i < endIndex; i++) {// start

// float score = scoreDoc[i].score;//平均得分

int docId = scoreDoc[i].doc;

Document doc = indexSearcher.doc(docId);

// 进行高亮操作

if (parameter.length > 0) {

for (int j = 0; j < parameter.length; j++) {

// 进行高亮操作,当没有找到关键词时,返回为null

String text = highlighter.getBestFragment(LuceneUtils.getAnalyzer(), parameter[j], doc.get(parameter[j]));

if (text != null) {

doc.getField(parameter[j]).setValue(text);

}

}

}

// 将Document转为Article

callback.documentToObject(doc, list, count);

}// end

// 返回结果集

QueryResult queryResult = new QueryResult(list, count);

return queryResult;

} catch (Exception e) {

throw new RuntimeException(e);

}

}

}

测试方法:

@Test

@SuppressWarnings("unchecked")

public void testQuery() {

String queryString = "Lucene";

String[] param = new String[]{"id","title","content"};

try {

QueryResult queryResult=ComsLuceneUtils.query(queryString, 0, 20, param, new cn.net.yixun.util.ComsLuceneUtils.CallBackQuery(){

public QueryResult documentToObject(Document doc,List list,int count)throws Exception{

Article article = new Article();

//将Document转为Article

//将字符串转化为数字

int id = NumericUtils.prefixCodedToInt(doc.get("id"));

article.setId(id);

article.setTitle(doc.get("title"));

article.setContent(doc.get("content"));

list.add(article);

QueryResult queryResult=new QueryResult(list, count);

return queryResult;

}

public Document objectToDocument(Object obj)throws Exception{

return null;};

});

List<Article> list = (List<Article>)queryResult.getList();

for(Article article:list){

System.err.println("list--------->" + article.toString());

}

} catch (Exception e) {

e.printStackTrace();

}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: