您的位置:首页 > 其它

lucene解析器分析

2014-05-07 17:26 232 查看
import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.Version;

public class AnalyzerTest {

public static void analysis(Analyzer analyzer, String txt) throws IOException {
System.out.println("analyzer:" + analyzer.getClass());
TokenStream stream = analyzer.tokenStream("content", new StringReader(txt));
stream.reset();
//
while (stream.incrementToken()) {
CharTermAttribute attribute = stream.getAttribute(CharTermAttribute.class);
OffsetAttribute offsetAttribute = stream.getAttribute(OffsetAttribute.class);
System.out.println("off:" + offsetAttribute.startOffset() + "----" + offsetAttribute.endOffset());
System.out.println("attr:" + attribute.toString());
}
}

public static void main(String[] args) throws IOException {
Analyzer a = new StandardAnalyzer(Version.LUCENE_48);
a = new SimpleAnalyzer(Version.LUCENE_48);
// a = new CJKAnalyzer(Version.LUCENE_48);
//a = new MyStopAnalyzer();
String txt = "this is a txt";
System.out.println("textLength:" + txt.length());
System.out.println("0-4:" + txt.substring(5, 7));
String zhTxt = "这是中文测试,hello  中文 The i am i am";
//analysis(a, txt);
analysis(a, zhTxt);
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  lucene