lucene 学习笔记
2012-04-10 10:41
337 查看
package com.test.lucene; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; import org.junit.Test; public class Main { private static final Version version = Version.LUCENE_35; private static final Analyzer analyzer = new StandardAnalyzer(version); private static final File indexDir = new File("E:/lucene/index"); private static final File dataDir = new File("E:/lucene/data"); /** * 建立索引 * * @throws Exception */ public void index() throws Exception { IndexWriter writer = getIndexWriter(); File[] files = dataDir.listFiles(); for (File file : files) { if (file.isDirectory()) {// 略过文件夹 continue; } Document doc = getDoc(file); writer.addDocument(doc); } writer.close(); } private IndexWriter getIndexWriter() { IndexWriterConfig iwc = null; IndexWriter writer = null; try { Directory dir = null; dir = FSDirectory.open(indexDir);// 索引文件保存在文件系统上, 存在io操作, 速度较慢 dir = new RAMDirectory();// 放在内存里, 速度快, 没有io操作, 但是程序一退出, 就没有了 // 可以结合以上两个优点:索引存放在文件系统上, 程序启动时, 把索引库读到内存, // 程序退出时, 把经过增删改的索引库保存会硬盘上 // 1.启动时读入 writer = new IndexWriter(dir, iwc); } catch (Exception e) { e.printStackTrace(); } return writer; } /** * 测试两种存放方式: * * <pre> * 1.启动时从filesystem加载索引到ram * 2.对ram中的索引进行增删改查 * 3.退出时保存:从ram到filesystem * </pre> */ @Test public void test() { IndexWriterConfig iwc1 = null; IndexWriterConfig iwc2 = null; IndexWriter fsWriter = null; IndexWriter ramWriter = null; // indexWriterConfig 不能用两次: the object cannot be set twice! iwc1 = new IndexWriterConfig(version, analyzer); iwc1.setOpenMode(OpenMode.CREATE_OR_APPEND);// ram中,添加文档,使用创建或追加 iwc2 = new IndexWriterConfig(version, analyzer); iwc2.setOpenMode(OpenMode.CREATE);// 因为是从ram中写入, ram中保存的是最新的, 所以直接创建 try { Directory fsDir = FSDirectory.open(indexDir); Directory ramDir = new RAMDirectory(fsDir);// 从systemfile加载 ramWriter = new IndexWriter(ramDir, iwc2);// 操作内存索引的writer ramWriter.addDocument(getDoc(new File("E:/lucene/data/test")));// 直接添加,方便点 ramWriter.commit(); ramWriter.close();// 关闭后才能把ram中最新的索引写回systemfile fsWriter = new IndexWriter(fsDir, iwc1);// 操作硬盘索引的writer fsWriter.addIndexes(ramDir); fsWriter.close(); } catch (Exception e) { e.printStackTrace(); } } private Document getDoc(File file) { Document doc = new Document(); /** * 网页搜索时有:url地址, 标题, 内容等, 而通常不需要通过url进行搜索, 但是url还是得存起来, * 这时需要用Field.Store.YES, Field.Index.NOT_ANALYZED <br/> * 索引 * * <pre> * +--+不索引 * +--+索引 * +---+分词 * +---+不分词 * </pre> */ Field name = new Field("name", file.getName(), Field.Store.YES, Field.Index.ANALYZED);// 索引 Field size = new Field("size", String.valueOf(file.length()), Field.Store.YES, Field.Index.NOT_ANALYZED); Field content = new Field("content", readFile(file), Field.Store.YES, Field.Index.ANALYZED); doc.add(name); doc.add(size); doc.add(content); return doc; } private String readFile(File file) { StringBuffer content = new StringBuffer(); String line = ""; BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader( new FileInputStream(file))); while ((line = reader.readLine()) != null) { content.append(line).append("\n"); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return content.toString(); } private Query getQuery(String fieldName, String key) throws Exception {// 单个field中索引 QueryParser parser = new QueryParser(version, fieldName, analyzer); return parser.parse(key); } private Query getQuery(String[] fields, String key) throws Exception {// 多个field中检索 QueryParser parser = new MultiFieldQueryParser(version, fields, analyzer); return parser.parse(key); } @Test public void search() throws Exception { IndexReader reader = IndexReader.open(FSDirectory.open(indexDir));// 存放在文件系统 // IndexReader reader = IndexReader.open(new RAMDirectory());// 存放在内存 IndexSearcher searcher = new IndexSearcher(reader); Query query = getQuery(new String[] { "name", "content" }, "test"); TopDocs hits = searcher.search(query, 100);// 100 是搜索最大记录数, 不是分页用的, 搞错了 int total = hits.totalHits; if (total > 0) { System.out.println("共找到" + total + "条记录"); } else { System.out.println("没有找到记录"); } ScoreDoc[] scoreDocs = hits.scoreDocs; int start = 0; int end = hits.totalHits; // for (ScoreDoc doc : hits.scoreDocs) {//这样不便分页 for (int i = start; i < end; i++) {// 可以分页 int sn = scoreDocs[i].doc;// 相当于获取主键, Document document = searcher.doc(sn);// 根据主键获取文档 print(document); } searcher.close(); } private void print(Document doc) { System.out .println("--------------------------------------------------"); System.out.println("name :" + doc.get("name")); System.out.println("size :" + doc.get("size")); System.out.println("content:\n" + doc.get("content")); } }
相关文章推荐
- Lucene学习笔记 (一)
- Lucene-学习笔记 (版本3,5VS 5.3)
- lucene学习笔记
- 【转载】Lucene学习笔记(七)
- 学习笔记之Lucene
- Lucene 学习笔记(一)——基本对象及结构
- lucene 学习笔记
- Lucene全文检索学习笔记
- Lucene学习笔记(1):Lucene的索引文件格式
- lucene compass 学习系列 笔记 一,纯lucene 构建搜索
- Lucene学习笔记-内存与文件索引的简单操作
- lucene3.6.0索引操作的学习笔记
- Lucene学习笔记(八)--完
- 【转载】Lucene学习笔记(八)--完
- Lucene学习笔记: 五,Lucene搜索过程解析
- Lucene 学习笔记 01 —— Lucene 的使用简介和开发步骤
- 我的lucene学习笔记
- Lucene 学习笔记(二)——搜索方式(一)
- lucene3.5学习笔记03--待续
- lucene 学习笔记