Lucene_demo09_txt文件索引
2015-10-16 09:09
260 查看
Java代码
/**
* txt文件索引
*/
public class IndexFile {
private Directory directory;
private String indexPath = "D://lucene/index"; // 建立索引文件的目录
private String dirPath = "D://lucene/data"; // txt资源目录
private IndexWriter indexWriter;
/*
* 获得所有txt文件
*/
public List<File> getFileList(String dirPath) {
File[] files = new File(dirPath).listFiles();
List<File> fileList = new ArrayList<File>();
for (File file : files) {
if (isTxtFile(file.getName())) {
fileList.add(file);
}
}
return fileList;
}
/*
* 判断是否是txt文件
*/
public boolean isTxtFile(String fileName) {
if (fileName.lastIndexOf(".txt") > 0) {
return true;
}
return false;
}
/*
* 将文件转换成Document对象
*/
public Document fileToDocument(File file) throws Exception {
Document document = new Document();
document.add(new Field("filename", file.getName(), Store.YES, Index.ANALYZED));
document.add(new Field("content", getFileContent(file), Store.YES, Index.ANALYZED));
document.add(new Field("size", String.valueOf(file.getTotalSpace()), Store.YES, Index.ANALYZED));
return document;
}
/*
* 获得indexwriter对象
*/
public IndexWriter getIndexWriter(Directory dir) throws Exception {
IndexWriter indexWriter = new IndexWriter(dir, LuceneUtils.analyzer, MaxFieldLength.LIMITED);
return indexWriter;
}
/*
* 关闭indexwriter对象
*/
public void closeWriter() throws Exception {
if (indexWriter != null) {
indexWriter.close();
}
}
/*
* 读取文件内容
*/
public String getFileContent(File file) throws Exception {
Reader reader = new InputStreamReader(new FileInputStream(file), "UTF-8");
BufferedReader br = new BufferedReader(reader);
String result = "";
while (br.readLine() != null) {
result = result + "\n" + br.readLine();
}
br.close();
reader.close();
return result;
}
/**
* 启动初始化
*/
@Before
public void init() {
try {
directory = FSDirectory.open(new File(indexPath));
indexWriter = getIndexWriter(directory);
}
catch (Exception e) {
System.out.println("索引打开异常!");
}
}
/**
* 创建索引 Main方法
*/
@Test
public void createIndex() throws Exception {
List<File> fileList = getFileList(dirPath);
Document document = null;
for (File file : fileList) {
document = fileToDocument(file);
indexWriter.addDocument(document);
System.out.println("filename:" + document.get("filename"));
System.err.println("content:" + document.get("content"));
indexWriter.commit();
}
closeWriter();
}
}
/**
* txt文件索引
*/
public class IndexFile {
private Directory directory;
private String indexPath = "D://lucene/index"; // 建立索引文件的目录
private String dirPath = "D://lucene/data"; // txt资源目录
private IndexWriter indexWriter;
/*
* 获得所有txt文件
*/
public List<File> getFileList(String dirPath) {
File[] files = new File(dirPath).listFiles();
List<File> fileList = new ArrayList<File>();
for (File file : files) {
if (isTxtFile(file.getName())) {
fileList.add(file);
}
}
return fileList;
}
/*
* 判断是否是txt文件
*/
public boolean isTxtFile(String fileName) {
if (fileName.lastIndexOf(".txt") > 0) {
return true;
}
return false;
}
/*
* 将文件转换成Document对象
*/
public Document fileToDocument(File file) throws Exception {
Document document = new Document();
document.add(new Field("filename", file.getName(), Store.YES, Index.ANALYZED));
document.add(new Field("content", getFileContent(file), Store.YES, Index.ANALYZED));
document.add(new Field("size", String.valueOf(file.getTotalSpace()), Store.YES, Index.ANALYZED));
return document;
}
/*
* 获得indexwriter对象
*/
public IndexWriter getIndexWriter(Directory dir) throws Exception {
IndexWriter indexWriter = new IndexWriter(dir, LuceneUtils.analyzer, MaxFieldLength.LIMITED);
return indexWriter;
}
/*
* 关闭indexwriter对象
*/
public void closeWriter() throws Exception {
if (indexWriter != null) {
indexWriter.close();
}
}
/*
* 读取文件内容
*/
public String getFileContent(File file) throws Exception {
Reader reader = new InputStreamReader(new FileInputStream(file), "UTF-8");
BufferedReader br = new BufferedReader(reader);
String result = "";
while (br.readLine() != null) {
result = result + "\n" + br.readLine();
}
br.close();
reader.close();
return result;
}
/**
* 启动初始化
*/
@Before
public void init() {
try {
directory = FSDirectory.open(new File(indexPath));
indexWriter = getIndexWriter(directory);
}
catch (Exception e) {
System.out.println("索引打开异常!");
}
}
/**
* 创建索引 Main方法
*/
@Test
public void createIndex() throws Exception {
List<File> fileList = getFileList(dirPath);
Document document = null;
for (File file : fileList) {
document = fileToDocument(file);
indexWriter.addDocument(document);
System.out.println("filename:" + document.get("filename"));
System.err.println("content:" + document.get("content"));
indexWriter.commit();
}
closeWriter();
}
}
相关文章推荐
- Linux:32/64位程序(应用程序、共享库、内核模块)
- 签劳动合同后可以辞职吗
- jquery中的选择器
- 第6周 项目1-建立顺序栈算法库
- 如何滚动到 android 系统上的特定位置的 listview
- 源码推荐(10.16):自己写的App第一次启动的界面,仿造京东的套餐购买 cell展开
- Lucene_demo08_Hightlighter高亮
- Lucene_demo07_Sort匹配度
- Android Studio导入第三开源库
- c++输入输出
- 解决tableViewCell分割线不到左边界的问题
- 分散投资,多点开花,一旦出现市场变动,资金不至于大量流失
- 第六周项目2 建立链栈算法库
- 后缀表达式
- 第7周项目4 - 队列数组
- 第七周项目2—建立链队算法库
- 整理 iOS 9 适配中出现的坑(图文)
- Lucene简介
- Lucene_demo05_内存索引和文件索引
- 在Ubuntu下配置pyrouge