您的位置：首页 > 其它

全文检索引擎工具包Lucene初体验

2008-01-28 17:47 267 查看

Lucene是apache软件基金会 jakarta项目组的一个子项目，是一个开放源代码的全文检索引擎工具包及架构，提供了完整的查询引擎和索引引擎，实现了一些通用的分词算法，预留很多词法分析器接口。

下载地址：http://lucene.apache.org/ (本文使用最新的lucene2.3)

A.创建索引文件

package demo;

import java.io.File;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.index.IndexWriter;

public class CreateDataBase ...{

public static void main(String[] args) ...{

CreateDataBase temp = new CreateDataBase();

if (temp.createDataBase("C:/temp") == 1) ...{

System.out.println("db init succ");

}

public CreateDataBase() ...{

}

public int createDataBase(File file) ...{

int returnValue = 0;

if (!file.isDirectory()) ...{

file.mkdirs();

}

try ...{

IndexWriter indexWriter = new IndexWriter(file,

new StandardAnalyzer(), true);

indexWriter.close();

returnValue = 1;

}

catch (Exception ex) ...{

ex.printStackTrace();

}

return returnValue;

}

public int createDataBase(String file) ...{

return this.createDataBase(new File(file));

}

}

B.添加记录：

1.先新建两个文本文件，文件名分别为ugg1.txt和ugg2.txt。内容随意，譬如：
ugg1.txt：
good
book
luky

ugg2.txt：
look
book
meet

并运行如下代码：

package demo;

import java.io.File;

import java.io.FileReader;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field;

import org.apache.lucene.index.IndexWriter;

public class InsertRecords ...{

public static void main(String[] args) ...{

InsertRecords temp = new InsertRecords();

String dbpath = "C:/temp";

// holen1.txt中包含关键字"holen"和"java"

if (temp.insertRecords(dbpath, "C:/temp/ugg1.txt") == 1) ...{

System.out.println("add file1 succ");

}

// holen2.txt中包含关键字"holen"和"chen"

if (temp.insertRecords(dbpath, "C:/temp/ugg2.txt") == 1) ...{

System.out.println("add file2 succ");

}

public InsertRecords() ...{

}

public int insertRecords(String dbpath, File file) ...{

int returnValue = 0;

try ...{

IndexWriter indexWriter = new IndexWriter(dbpath,

new StandardAnalyzer(), false);

this.addFiles(indexWriter, file);

returnValue = 1;

} catch (Exception ex) ...{

ex.printStackTrace();

}

return returnValue;

}

public int insertRecords(String dbpath, String file) ...{

return this.insertRecords(dbpath, new File(file));

}

public void addFiles(IndexWriter indexWriter, File file) ...{

Document doc = new Document();

try ...{

doc.add(new Field("filename", file.getName(), Field.Store.YES,

Field.Index.UN_TOKENIZED));

// 以下两句只能取一句,前者是索引不存储,后者是索引且存储

doc.add(new Field("contents", new FileReader(file)));

indexWriter.addDocument(doc);

indexWriter.close();

} catch (Exception ex) ...{

ex.printStackTrace();

}

}

C.进行查询

package demo;

import java.util.ArrayList;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.queryParser.QueryParser;

import org.apache.lucene.search.Hits;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.Query;

import org.apache.lucene.search.Searcher;

public class QueryRecords ...{

public QueryRecords() ...{

}

public ArrayList queryRecords(String searchkey, String dbpath,

String searchfield) ...{

ArrayList list = null;

try ...{

Searcher searcher = new IndexSearcher(dbpath);

QueryParser parser = new QueryParser(searchfield,

new StandardAnalyzer());

Query query = parser.parse(searchkey);

Hits hits = searcher.search(query);

if (hits != null) ...{

list = new ArrayList();

int temp_hitslength = hits.length();

Document doc = null;

for (int i = 0; i < temp_hitslength; i++) ...{

doc = hits.doc(i);

list.add(doc.get("filename"));

}

} catch (Exception ex) ...{

ex.printStackTrace();

}

return list;

}

public static void main(String[] args) ...{

QueryRecords temp = new QueryRecords();

ArrayList list = null;

list = temp.queryRecords("look", "c:/temp", "contents");

for (int i = 0; i < list.size(); i++) ...{

System.out.println((String) list.get(i));

}

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航