您的位置:首页 > 其它

搜索引擎智能提示的实现-基于Lucene拼音检查库

2015-02-01 21:47 531 查看
package lia.tools;

/**
* Copyright Manning Publications Co.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0 *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific lan
*/

import java.io.IOException;
import java.io.File;
import org.apache.lucene.search.spell.SpellChecker;
import org.apache.lucene.search.spell.LuceneDictionary;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.Directory;

//创建拼音检查库
// From chapter 8
public class CreateSpellCheckerIndex {

public static void main(String[] args) throws IOException {

if (args.length != 3) {
System.out.println("Usage: java lia.tools.SpellCheckerTest SpellCheckerIndexDir IndexDir IndexField");
System.exit(1);
}

String spellCheckDir = args[0];
String indexDir = args[1];
String indexField = args[2];

System.out.println("Now build SpellChecker index...");
Directory dir = FSDirectory.open(new File(spellCheckDir));
SpellChecker spell = new SpellChecker(dir);     //#A
long startTime = System.currentTimeMillis();

Directory dir2 = FSDirectory.open(new File(indexDir));
IndexReader r = IndexReader.open(dir2);     //#B
try {
spell.indexDictionary(new LuceneDictionary(r, indexField));  //#C
} finally {
r.close();
}
dir.close();
dir2.close();
long endTime = System.currentTimeMillis();
System.out.println("  took " + (endTime-startTime) + " milliseconds");
}
}
/*
#A Create SpellChecker on its directory 创建检查库
#B Open IndexReader containing words to add to spell dictionary 添加到拼音检查库
#C Add all words from the specified fields into the spell checker index 添加索引指定域单词到拼音检查库
*/
package lia.tools;

/**
* Copyright Manning Publications Co.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0 *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific lan
*/

import java.io.IOException;
import java.io.File;
import org.apache.lucene.search.spell.SpellChecker;
import org.apache.lucene.search.spell.JaroWinklerDistance;
import org.apache.lucene.search.spell.LevensteinDistance;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.index.IndexReader;

//搜索关键字拼音智能提示的实现,通过拼音检查库
// From chapter 8
public class SpellCheckerExample {

public static void main(String[] args) throws IOException {

if (args.length != 2) {
System.out.println("Usage: java lia.tools.SpellCheckerTest SpellCheckerIndexDir wordToRespell");
System.exit(1);
}

String spellCheckDir = args[0];
String wordToRespell = args[1];

Directory dir = FSDirectory.open(new File(spellCheckDir));
if (!IndexReader.indexExists(dir)) {
System.out.println("\nERROR: No spellchecker index at path \"" +
spellCheckDir +
"\"; please run CreateSpellCheckerIndex first\n");
System.exit(1);
}
SpellChecker spell = new SpellChecker(dir);  //#A

spell.setStringDistance(new LevensteinDistance());  //#B
//spell.setStringDistance(new JaroWinklerDistance());

String[] suggestions = spell.suggestSimilar(wordToRespell, 5); //#C
System.out.println(suggestions.length + " suggestions for '" + wordToRespell + "':");
for (String suggestion : suggestions)
System.out.println("  " + suggestion);
}
}
/*
#A Create SpellCheck from existing spell check index
#B Sets the string distance metric used to rank the suggestions
#C Generate respelled candidates
*/
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: