lucene_43版本创建索引demo
2016-06-22 10:57
447 查看
package com.shentong.search; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.util.Date; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.LongField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; /** * Lucene_solr_4_3版本 * * @author ywflqq * */ public class TxtFileIndexTest { public static void main(String[] args) { // indexDir is the directory that hosts Lucene's index files try { File indexDir = new File("E:\\数据\\luceneindex"); // dataDir is the directory that hosts the text files that to be // indexed File dataDir = new File("E:\\数据\\classifier"); Analyzer luceneAnalyzer = new StandardAnalyzer(Version.LUCENE_43); Directory indexStorePos = FSDirectory.open(indexDir); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, luceneAnalyzer); conf.setOpenMode(OpenMode.CREATE); IndexWriter indexWriter = new IndexWriter(indexStorePos, conf); Date start = new Date(); indexDocs(indexWriter, dataDir); indexWriter.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } } static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary(临时的) files raise this // exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't // tokenize // the field into separate words and don't index term // frequency // or positional information: Field pathField = new StringField("path", file.getPath(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named // "modified". // Use a LongField that is indexed (i.e. efficiently // filterable with // NumericRangeFilter). This indexes to milli-second // resolution, which // is often too fine. You could instead create a number // based on // year/month/day/hour/minutes/seconds, down the resolution // you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); // Add the contents of the file to a field named "contents". // Specify a Reader, // so that the text of the file is tokenized and indexed, // but not stored. // Note that FileReader expects the file to be in UTF-8 // encoding. // If that's not the case searching for special characters // will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old // document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have // been indexed) so // we use updateDocument instead to replace the old one // matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } } }
相关文章推荐
- C# System.IO命名空间常用的类
- 日常小结-XML
- HTML5 Web Workers 使用案例
- 将Java的List结构通过GSON库转换为JSON的方法示例
- 第三次阶段贡献分
- IOS用CGContextRef画各种图形(文字、圆、直线、弧线、矩形、扇形、椭圆、三角形、圆角矩形、贝塞尔曲线、图片)
- 在XML里的XSD和DTD以及standalone的使用2----具体使用详解
- 杂
- 报错:does not contain bitcode. You must rebuild it with bitcode enabled
- MySQL字符串函数详解(推荐)
- java中的抽象类和抽象方法
- javascript:history.go()和History.back()的区别及应用
- 报错 - profile: Permission denied
- 12年成都 E 贪心+KMP HDU 4468
- sqlsever 导入大数据sql文件
- MYSQL 缓存详解 [myownstars] 经典博客
- web开发规范文档
- 15款优秀移动APP产品原型设计工具
- 直角三角形 射影定理 欧几里德定理
- android开发常用框架