您的位置:首页 > 其它

lucene_43版本创建索引demo

2016-06-22 10:57 447 查看
package com.shentong.search;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

/**
* Lucene_solr_4_3版本
*
* @author ywflqq
*
*/
public class TxtFileIndexTest {
public static void main(String[] args) {
// indexDir is the directory that hosts Lucene's index files
try {
File indexDir = new File("E:\\数据\\luceneindex");
// dataDir is the directory that hosts the text files that to be
// indexed
File dataDir = new File("E:\\数据\\classifier");
Analyzer luceneAnalyzer = new StandardAnalyzer(Version.LUCENE_43);
Directory indexStorePos = FSDirectory.open(indexDir);
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, luceneAnalyzer);
conf.setOpenMode(OpenMode.CREATE);
IndexWriter indexWriter = new IndexWriter(indexStorePos, conf);
Date start = new Date();
indexDocs(indexWriter, dataDir);
indexWriter.close();
Date end = new Date();
System.out.println(end.getTime() - start.getTime() + " total milliseconds");
} catch (IOException e) {
System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
}
}

static void indexDocs(IndexWriter writer, File file) throws IOException {
// do not try to index files that cannot be read
if (file.canRead()) {
if (file.isDirectory()) {
String[] files = file.list();
// an IO error could occur
if (files != null) {
for (int i = 0; i < files.length; i++) {
indexDocs(writer, new File(file, files[i]));
}
}
} else {

FileInputStream fis;
try {
fis = new FileInputStream(file);
} catch (FileNotFoundException fnfe) {
// at least on windows, some temporary(临时的) files raise this
// exception with an "access denied" message
// checking if the file can be read doesn't help
return;
}

try {

// make a new, empty document
Document doc = new Document();

// Add the path of the file as a field named "path". Use a
// field that is indexed (i.e. searchable), but don't
// tokenize
// the field into separate words and don't index term
// frequency
// or positional information:
Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
doc.add(pathField);

// Add the last modified date of the file a field named
// "modified".
// Use a LongField that is indexed (i.e. efficiently
// filterable with
// NumericRangeFilter). This indexes to milli-second
// resolution, which
// is often too fine. You could instead create a number
// based on
// year/month/day/hour/minutes/seconds, down the resolution
// you require.
// For example the long value 2011021714 would mean
// February 17, 2011, 2-3 PM.
doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

// Add the contents of the file to a field named "contents".
// Specify a Reader,
// so that the text of the file is tokenized and indexed,
// but not stored.
// Note that FileReader expects the file to be in UTF-8
// encoding.
// If that's not the case searching for special characters
// will fail.
doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
// New index, so we just add the document (no old
// document can be there):
System.out.println("adding " + file);
writer.addDocument(doc);
} else {
// Existing index (an old copy of this document may have
// been indexed) so
// we use updateDocument instead to replace the old one
// matching the exact
// path, if present:
System.out.println("updating " + file);
writer.updateDocument(new Term("path", file.getPath()), doc);
}

} finally {
fis.close();
}
}
}
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: