您的位置:首页 > 其它

Lucene入门

2017-04-17 03:37 141 查看
public class LuceneTest {
//创建索引
@Test
public void test1() throws IOException {
//创建文档对象
Document document = new Document();
document.add(new StringField("id","1", Field.Store.YES));
document.add(new TextField("content","谷歌地图之父跳槽FaceBook", Field.Store.YES));
Document document1 = new Document();
document1.add(new StringField("id","2", Field.Store.YES));
document1.add(new TextField("content","谷歌地图字符加盟FaceBook", Field.Store.YES));

FSDirectory open = FSDirectory.open(new File("D:\\indexDir"));
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer);
IndexWriter writer = new IndexWriter(open, config);
writer.addDocument(document);
writer.addDocument(document1);
writer.commit();
writer.close();
}
@Test
public void test2() throws IOException {
Document document = new Document();
document.add(new StringField("id","1", Field.Store.YES));
document.add(new TextField("content","谷歌地图之父加盟FaceBook", Field.Store.YES));
FSDirectory open = FSDirectory.open(new File("D:\\indexDir"));
Analyzer analyzer = new IKAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer);
IndexWriter writer = new IndexWriter(open, config);
writer.addDocument(document);
writer.commit();
writer.close();
}
//创建索引
@Test
public void test3() throws IOException {
List<Document> list = new ArrayList<>();
FieldType type = new FieldType();
type.setIndexed(true);//是否索引
type.setStored(true);//是否存储
type.setTokenized(true);//是否分词
Document d1 = new Document();
d1.add(new Field("id","1",type));
d1.add(new TextField("content","传智播客之父跳槽FaceBook,这是真的吗?", Field.Store.YES));
Document d2 = new Document();
d2.add(new Field("id","2", type));
d2.add(new TextField("content","谷歌地图之父加盟FaceBook", Field.Store.YES));
Document d3 = new Document();
d3.add(new Field("id","3",type));
d3.add(new TextField("content","谷歌地图创始人拉斯离开谷歌加盟Facebook", Field.Store.YES));
Document d4 = new Document();
d4.add(new Field("id","4",type));
d4.add(new TextField("content","谷歌地图之父跳槽Facebook与Wave项目取消有关", Field.Store.YES));
Document d5 = new Document();
d5.add(new Field("id","5",type));
TextField field = new TextField("content", "谷歌地图之父拉斯加盟社交网站Facebook", Field.Store.YES);
field.setBoost(10);
d5.add(field);
list.add(d1);
list.add(d2);
list.add(d3);
list.add(d4);
list.add(d5);
FSDirectory open = FSDirectory.open(new File("E:\\indexDir"));
Analyzer analyzer = new IKAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer);
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
IndexWriter writer = new IndexWriter(open, config);
writer.addDocuments(list);
writer.commit();
writer.close();
}

//查询索引
@Test
public void test4() throws ParseException, IOException {
QueryParser parser = new QueryParser("content", new IKAnalyzer());
Query query = parser.parse("谷歌");
IndexSearcher searcher = new IndexSearcher(IndexReader.open(FSDirectory.open(new File("E:\\indexDir"))));
TopDocs topDocs = searcher.search(query, Integer.MAX_VALUE);
System.out.println(topDocs.totalHits);
ScoreDoc[] docs = topDocs.scoreDocs;
for (ScoreDoc sd :
docs) {
System.out.println(sd.doc);
Document doc = searcher.doc(sd.doc);
System.out.println(doc.get("id"));
System.out.println(doc.get("content"));
}
}

@Test
public void test5() throws ParseException, IOException {
QueryParser content = new QueryParser("content", new IKAnalyzer());
Query query = content.parse("跳槽");
IndexSearcher searcher = new IndexSearcher(IndexReader.open(FSDirectory.open(new File("e:\\indexDir"))));
TopDocs docs = searcher.search(query, Integer.MAX_VALUE);
System.out.println(docs.totalHits);
ScoreDoc[] scoreDocs = docs.scoreDocs;
for (ScoreDoc scoreDoc:
scoreDocs) {
System.out.println(scoreDoc.doc);
Document doc = searcher.doc(scoreDoc.doc);
System.out.println(doc.get("id")+"-----------------"+doc.get("content"));
}
}
//词条查询
@Test
public void test6() throws IOException {
TermQuery query = new TermQuery(new Term("content", "谷歌地图之父加盟FaceBook"));
IndexSearcher searcher = new IndexSearcher(IndexReader.open(FSDirectory.open(new File("E:\\indexDir"))));
TopDocs docs = searcher.search(query, Integer.MAX_VALUE);
System.out.println(docs.totalHits);
ScoreDoc[] scoreDocs = docs.scoreDocs;
for (ScoreDoc sd:
scoreDocs) {
System.out.println(sd.doc);
System.out.println(sd.score);
Document doc = searcher.doc(sd.doc);
System.out.println(doc.get("id"));
System.out.println(doc.get("content"));
}
}
//模糊查询
@Test
public void test7() throws IOException {
WildcardQuery query = new WildcardQuery(new Term("content","*"+"吗"+"*"));
IndexSearcher searcher = new IndexSearcher(IndexReader.open(FSDirectory.open(new File("E:\\indexDir"))));
TopDocs topDocs = searcher.search(query, Integer.MAX_VALUE);
System.out.println("命中数目:"+topDocs.totalHits);
ScoreDoc[] docs = topDocs.scoreDocs;
for (ScoreDoc scoreDoc:docs) {
System.out.println("内置id:"+scoreDoc.doc);
System.out.println("得分:"+scoreDoc.score);
Document doc = searcher.doc(scoreDoc.doc);
System.out.println(doc.get("id"));
System.out.println(doc.get("content"));
}
}
@Test
public void test8() throws IOException {
List<Document> list = new ArrayList<>();
Document document = new Document();
document.add(new StringField("id","1", Field.Store.YES));
list.add(document);
FSDirectory open = FSDirectory.open(new File("E:\\indexDir"));
IKAnalyzer analyzer = new IKAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer);
IndexWriter writer = new IndexWriter(open, config);
writer.addDocuments(list);
writer.commit();
writer.close();
}
//相似度查询
@Test
public void test9() throws IOException {
FuzzyQuery query = new FuzzyQuery(new Term("content","胡歌"),2);
IndexSearcher searcher = new IndexSearcher(IndexReader.open(FSDirectory.open(new File("e:\\indexDir"))));
TopDocs search = searcher.search(query, Integer.MAX_VALUE);
System.out.println(search.totalHits);
ScoreDoc[] scoreDocs = search.scoreDocs;
for (ScoreDoc doc:
scoreDocs) {
System.out.println(doc.doc);
System.out.println(doc.score);
Document document = searcher.doc(doc.doc);
System.out.println(document.get("id"));
System.out.println(document.get("content"));
}
}
//查询索引库所有的内容
@Test
public void test10() throws IOException {
MatchAllDocsQuery query = new MatchAllDocsQuery();
IndexSearcher searcher = new IndexSearcher(IndexReader.open(FSDirectory.open(new File("e://indexDir"))));
TopDocs topDocs = searcher.search(query, Integer.MAX_VALUE);
ScoreDoc[] docs = topDocs.scoreDocs;
for (ScoreDoc sd :
docs) {
System.out.println(sd.doc);
System.out.println(sd.score);
Document doc = searcher.doc(sd.doc);
System.out.println(doc.get("id"));
System.out.println(doc.get("content"));
}
}
@Test
public void test11() throws IOException {
FieldType fieldType = new FieldType();
fieldType.setStored(true);
fieldType.setTokenized(true);
fieldType.setIndexed(true);
Document document = new Document();
document.add(new Field("id","1",fieldType));
document.add(new TextField("content","我就是祁双伟", Field.Store.YES));
FSDirectory open = FSDirectory.open(new File("e:\\indexDir"));
IKAnalyzer analyzer = new IKAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer);
IndexWriter writer = new IndexWriter(open, config);
writer.updateDocument(new Term("id","4"),document);
writer.commit();
writer.close();
}
//删除索引
@Test
public void test12() throws IOException {
FSDirectory open = FSDirectory.open(new File("e:\\indexDir"));
IKAnalyzer analyzer = new IKAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer);
IndexWriter writer = new IndexWriter(open, config);
writer.deleteAll();
writer.commit();
writer.close();
}

//高亮显示
@Test
public void test13() throws ParseException, IOException, InvalidTokenOffsetsException {
QueryParser parser = new QueryParser("content",new IKAnalyzer());
Query query = parser.parse("谷歌");
//        MatchAllDocsQuery query = new MatchAllDocsQuery();
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query));
Sort sort = new Sort(new SortField("id", SortField.Type.LONG, false));
IndexSearcher searcher = new IndexSearcher(IndexReader.open(FSDirectory.open(new File("E:\\indexDir"))));
TopDocs topDocs = searcher.search(query, Integer.MAX_VALUE,sort);
ScoreDoc[] docs = topDocs.scoreDocs;
for (ScoreDoc sd :
docs) {
System.out.println(sd.doc);
Document document = searcher.doc(sd.doc);
//            System.out.println(document.get("id"));
String s = highlighter.getBestFragment(new IKAnalyzer(), "content", document.get("content"));
System.out.println(s+"得分是:"+sd.score);
}
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  lucene