您的位置:首页 > 其它

IKAnalyzer 基于Lucene4.2 的开发案例

2013-10-22 16:21 330 查看
基于IKAnalyzer 的lucene案例 

用于4.2版本的lucene

package test;

import java.io.File;

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field;

import org.apache.lucene.document.TextField;

import org.apache.lucene.index.IndexReader;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.index.IndexWriterConfig;

import org.apache.lucene.index.Term;

import org.apache.lucene.search.BooleanQuery;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.ScoreDoc;

import org.apache.lucene.search.Sort;

import org.apache.lucene.search.TermQuery;

import org.apache.lucene.search.TopDocs;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.FSDirectory;

import org.apache.lucene.util.Version;

import org.wltea.analyzer.lucene.IKAnalyzer;

public class Searcher {

Analyzer analyzer =  new IKAnalyzer();
private static Directory dir = null;
private static String str = "根据作者官方说法IK分词器采用“正向迭代最细粒度切分算法”,分析它的源代码,可以看到分词工具类IKQueryParser起至关重要的作用," +
"它对搜索关键词采用从最大词到最小词层层迭代检索方式切分," +
"比如搜索词:“中华人民共和国成立了”,首先到词库中检索该搜索词中最大分割词," +
"即分割为:“中华人民共和国”和“成立了”,然后对“中华人民共和国”切分为“中华人民”和“" +
"人民共和国”,以此类推。最后,“中华人民共和国成立了”切分为:";

private static IndexReader reader = null;

public Searcher(){

try {
dir = FSDirectory.open(new File("E:\\lucene4.2\\index_new"));
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}

public void index(){
IndexWriter indexWriter = null;
try {
indexWriter = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_42, analyzer));
Document doc = new Document();
doc.add(new TextField("strs", str, Field.Store.YES));
indexWriter.addDocument(doc);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
if(indexWriter != null ){
try {
indexWriter.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
public void search(String query){
try {
reader =IndexReader.open(dir);
IndexSearcher searcher = new IndexSearcher(reader);

BooleanQuery queryfinal = new BooleanQuery();
TermQuery Tquery = new TermQuery(new Term("strs", query));

TopDocs tdoc = searcher.search(Tquery,10);
// 一 打文件的数量
ScoreDoc[] hits=tdoc.scoreDocs;
for(int i=0;i<hits.length;i++){
Document doc = searcher.doc(hits[i].doc);
System.out.println(doc.toString());
}

} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

}
public void check(){
try {
IndexReader reader = IndexReader.open(dir);
System.out.println(reader.numDocs());
for(int i=0;i<reader.numDocs();i++){
System.out.println(reader.document(i));
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}

public static void main(String[] args) {
new Searcher().check();
}

}

package test;

import org.junit.Test;

public class testAnalyzer {
private static Searcher searcher=new Searcher();   

    @Test  

    public void Testindex()  

    {  

    searcher.index();  

    }  

    @Test  

    public void Testsearch()  

    {  

    searcher.search("分词");  

        System.out.println("Are you Success");  

    }  

}

所用的jar 包
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息