最简单的支持中文的示例
2005-06-04 22:34
891 查看
需要先设置classpath包含lucene-*.*.jar 和 CJKAnalyzer.class及相关类的包
建立索引:
import java.io.*;
import java.text.*;
import java.util.*;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
public class Mp3Indexer
{
public final static String mp3Path="E://悦耳_music//new songs";//mp3所在目录
public final static String indexPath="d://mp3Indexer";//索引存放目录
public static void main(String[] args) throws ClassNotFoundException, IOException{
try {
IndexWriter writer = new IndexWriter(indexPath, new CJKAnalyzer(), true);
indexMp3s(writer, new File(mp3Path));
System.out.println("优化中....");
writer.optimize();
writer.close();
} catch (Exception e) {
System.out.println(e.getMessage());
}
}
public static void indexMp3s(IndexWriter writer, File file) throws Exception {
if (file.isDirectory()) {
String[] files = file.list();
for (int i = 0; i < files.length; i++) {
indexMp3s(writer, new File(file, files[i]));
}
}
else if (file.getPath().endsWith(".mp3")) { //只对 MP3 文件做索引
System.out.print("正在处理文件:" + file + " ....");
// Add mp3 file ....
Document doc = new Document();
doc.add(Field.Text("name", file.getName())); //索引文件名
doc.add(Field.UnIndexed("modified", DateFormat.getDateTimeInstance().format(new Date(file.lastModified())))); //索引最后修改时间
System.out.println("[处理完成]");
writer.addDocument(doc);
} //end else if
}
} //end class
查询:
import java.io.IOException;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Hits;
import org.apache.lucene.queryParser.QueryParser;
class SearchFiles {
public static void main(String[] args) {
try {
Searcher searcher = new IndexSearcher("d://mp3Indexer");
Analyzer analyzer = new CJKAnalyzer();
BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
while (true) {
System.out.print("Query: ");
String line = in.readLine();
if (line.length() == -1)
break;
Query query = QueryParser.parse(line, "name", analyzer);
System.out.println("Searching for: " + query.toString("name"));
Hits hits = searcher.search(query);
System.out.println(hits.length() + " total matching documents");
final int HITS_PER_PAGE = 10;
for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {
int end = Math.min(hits.length(), start + HITS_PER_PAGE);
for (int i = start; i < end; i++) {
Document doc = hits.doc(i);
String path = doc.get("name");
if (path != null) {
System.out.println(i + ". " + path);
}
}
if (hits.length() > end) {
System.out.print("more (y/n) ? ");
line = in.readLine();
if (line.length() == 0 || line.charAt(0) == 'n')
break;
}
}
}
searcher.close();
} catch (Exception e) {
System.out.println(" caught a " + e.getClass() +
"/n with message: " + e.getMessage());
}
}
}
建立索引:
import java.io.*;
import java.text.*;
import java.util.*;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
public class Mp3Indexer
{
public final static String mp3Path="E://悦耳_music//new songs";//mp3所在目录
public final static String indexPath="d://mp3Indexer";//索引存放目录
public static void main(String[] args) throws ClassNotFoundException, IOException{
try {
IndexWriter writer = new IndexWriter(indexPath, new CJKAnalyzer(), true);
indexMp3s(writer, new File(mp3Path));
System.out.println("优化中....");
writer.optimize();
writer.close();
} catch (Exception e) {
System.out.println(e.getMessage());
}
}
public static void indexMp3s(IndexWriter writer, File file) throws Exception {
if (file.isDirectory()) {
String[] files = file.list();
for (int i = 0; i < files.length; i++) {
indexMp3s(writer, new File(file, files[i]));
}
}
else if (file.getPath().endsWith(".mp3")) { //只对 MP3 文件做索引
System.out.print("正在处理文件:" + file + " ....");
// Add mp3 file ....
Document doc = new Document();
doc.add(Field.Text("name", file.getName())); //索引文件名
doc.add(Field.UnIndexed("modified", DateFormat.getDateTimeInstance().format(new Date(file.lastModified())))); //索引最后修改时间
System.out.println("[处理完成]");
writer.addDocument(doc);
} //end else if
}
} //end class
查询:
import java.io.IOException;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Hits;
import org.apache.lucene.queryParser.QueryParser;
class SearchFiles {
public static void main(String[] args) {
try {
Searcher searcher = new IndexSearcher("d://mp3Indexer");
Analyzer analyzer = new CJKAnalyzer();
BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
while (true) {
System.out.print("Query: ");
String line = in.readLine();
if (line.length() == -1)
break;
Query query = QueryParser.parse(line, "name", analyzer);
System.out.println("Searching for: " + query.toString("name"));
Hits hits = searcher.search(query);
System.out.println(hits.length() + " total matching documents");
final int HITS_PER_PAGE = 10;
for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {
int end = Math.min(hits.length(), start + HITS_PER_PAGE);
for (int i = start; i < end; i++) {
Document doc = hits.doc(i);
String path = doc.get("name");
if (path != null) {
System.out.println(i + ". " + path);
}
}
if (hits.length() > end) {
System.out.print("more (y/n) ? ");
line = in.readLine();
if (line.length() == 0 || line.charAt(0) == 'n')
break;
}
}
}
searcher.close();
} catch (Exception e) {
System.out.println(" caught a " + e.getClass() +
"/n with message: " + e.getMessage());
}
}
}
相关文章推荐
- 一个简单的汉字搜索匹配示例(支持拼音、首字母简写)
- 原生JavaScript简单的静态搜索过滤文字(只支持中文,暂无拼音搜索)
- 让APACHE支持CGI及简单用C写的CGI示例
- Fluid Mask 3.3.16 for Mac 最新版 独立运行/PS 插件 均完美支持 10.12 系统 简体中文汉化版 简单易用的抠图软件
- php实现支持中文的文件下载功能示例
- 支持中文的无组件文件上传:示例
- Qt写的简单的字符串加密算法(支持中文字符串加密)
- 简单的不用系统函数的加密解密函数,支持中文
- c++支持coroutine的简单示例
- Fluid Mask 3.3.16 for Mac 最新版 完美支持 10.12 系统 简体中文汉化版 简单易用的抠图软件
- Git Bash 不支持中文的简单解决办法
- 使用vue实现简单键盘的示例(支持移动端和pc端)
- php实现的简单中文验证码功能示例
- 通过简单设置让CACTI支持中文显示
- 5.QT制作编译器,可以简单支持中文编程
- Git Bash不支持中文的超简单解决办法
- [刷机教程] 刷中文recovery V880最简单的刷机教程,更新recovery,修复若干BUG,支持最新ROM
- FXLabel支持中文行距的简单修改
- python发送邮件示例(支持中文邮件标题)
- proxool 最简单的设置mysql 支持 中文的方式