您的位置:首页 > 其它

lucene.net2.9搜索Demo

2010-12-20 17:07 253 查看
上次用到lucene.net时,其版本还是2.4,现在升级到2.9后,以前的很多方法被抛弃不用,到3.0后,就被删除不用。所以现在把以前弃置不用的类、方法和属性全部用新的替换之,先做个查询demo。

开发环境:vs2010(.net4)+lucene.net2.9+盘古分词2.3.1和高亮显示(也可以用lucene自带的分词和高亮显示)

具体代码如下:

protected void GetSearchPageDemo()
{
Stopwatch watch = new Stopwatch();
watch.Start(); //开始计时
string strPath = tbIndexPath.Text;      //索引文件所在路径
string strKeyWords = tbKeyWords.Text;   //关键字
int pageIndex = 1;                      //当前页
int intToTalCount = 0;                  //总记录数
int CurrenPage = 0;                     //总页数
int pageSize = 10;                      //页面大小
if (tbPageIndex.Text != "")
{
pageIndex = Convert.ToInt32(tbPageIndex.Text);
}
strKeyWords = GetKeyWordsSplitBySpace(strKeyWords, new PanGuTokenizer());    //处理后的关键字
//System.IO.DirectoryInfo dirIndex = new System.IO.DirectoryInfo(strPath);
IndexReader reader = IndexReader.Open(FSDirectory.Open(new System.IO.DirectoryInfo(strPath)), true);
Searcher searcher = new IndexSearcher(reader);
Analyzer analyzer = new PanGuAnalyzer(true);    //使用盘古分词控件
///多字段搜索
string[] quryFeiled = { strKeyWords, strKeyWords, strKeyWords };
string[] filed = { "Title", "Content", "keyName" };
Query query = MultiFieldQueryParser.Parse(Version.LUCENE_29, quryFeiled, filed, analyzer);
////单字段搜索
//QueryParser queryParser = new QueryParser(Version.LUCENE_29, "Title", analyzer);
//Query quey = queryParser.Parse(strKeyWords);
List<ScoreDoc> hits = null;
pageIndex = Math.Max(1, pageIndex);
hits = GetScoreDocDemo1(query, searcher, pageIndex, pageSize, out intToTalCount);
CurrenPage = intToTalCount % pageSize == 0 ? intToTalCount / pageSize : intToTalCount / pageSize + 1;
pageIndex = Math.Min(CurrenPage, pageIndex);
StringBuilder sb = new StringBuilder();
sb.AppendFormat("<p>{2}/{3} 总数:{0} 显示前{1}条记录</p>", intToTalCount, hits.Count, pageIndex.ToString(), CurrenPage.ToString());
sb.Append("<table border=/"0/" cellpadding=/"0/" cellspacing=/"0/" width=/"100%/">");
//使用盘古分词控件来显示高亮
PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
string titele = "";
for (int i = 0; i < hits.Count; i++)
{
Document doc = searcher.Doc(hits[i].doc);
PanGu.HighLight.Highlighter highlighter =
new PanGu.HighLight.Highlighter(simpleHTMLFormatter,
new Segment());
highlighter.FragmentSize = 120; //获取文本大小
sb.Append("<tr>");
titele = highlighter.GetBestFragment(strKeyWords, doc.Get("Title"));
sb.AppendFormat("<td><font size='5'>{0}</font></td>", titele == "" ? doc.Get("Title") : titele);
sb.AppendFormat("<td>{0}</td>", doc.Get("visits"));
sb.AppendFormat("<td>{0}</td>", doc.Get("keyName"));
sb.AppendFormat("<td>{0}</td>", doc.Get("Idate"));
sb.AppendFormat("<td>{0}</td>", hits[i].score);
sb.Append("</tr>");
sb.Append("<tr>");
sb.AppendFormat("<td colspan='5'><div style="padding:10px" mce_style="padding:10px">{0}</div></td>", highlighter.GetBestFragment(strKeyWords, doc.Get("Content")));
sb.Append("</tr>");
}
sb.Append("</table>");
reader.Close();
watch.Stop();
lbInformation.Text = "用时:" + watch.Elapsed.Hours + ":" + watch.Elapsed.Minutes + ":" + watch.Elapsed.Seconds + ":" + watch.Elapsed.Milliseconds + sb.ToString();
}
/// <summary>
/// 获取lucene查询结果集 使用TopScoreDocCollector+TopDocs
/// </summary>
/// <param name="query">Query</param>
/// <param name="searcher">Searcher</param>
/// <param name="pageIndex">当前页</param>
/// <param name="pageSize">页面大小</param>
/// <param name="intToTalCount">结果数目</param>
/// <returns></returns>
protected List<ScoreDoc> GetScoreDocDemo1(Query query, Searcher searcher, int pageIndex, int pageSize, out int intToTalCount)
{
List<ScoreDoc> listScoreDoc = new List<ScoreDoc>();
TopScoreDocCollector collector = TopScoreDocCollector.create(searcher.MaxDoc(), true);
searcher.Search(query, collector);
intToTalCount = collector.GetTotalHits();
if (intToTalCount < 1)
return listScoreDoc;
int currPage = intToTalCount % pageSize == 0 ? intToTalCount / pageSize : intToTalCount / pageSize + 1;
pageIndex = Math.Min(currPage, pageIndex);
TopDocs topdoce = collector.TopDocs((pageIndex - 1) * pageSize, pageSize);          //获取指定页的记录
ScoreDoc[] hits = topdoce.scoreDocs;
listScoreDoc = hits.ToList();
return listScoreDoc;
}
/// <summary>
/// 获取lucene查询结果集 使用TopDocs
/// </summary>
/// <param name="query">Query</param>
/// <param name="searcher">Searcher</param>
/// <param name="pageIndex">当前页</param>
/// <param name="pageSize">页面大小</param>
/// <param name="intToTalCount">结果数目</param>
/// <returns></returns>
protected List<ScoreDoc> GetScoreDocDemo2(Query query, Searcher searcher, int pageIndex, int pageSize, out int intToTalCount)
{
List<ScoreDoc> listScoreDoc = new List<ScoreDoc>();
TopDocs topDocs = searcher.Search(query, (Filter)null, pageIndex * pageSize);
ScoreDoc[] hits = topDocs.scoreDocs;
intToTalCount = topDocs.totalHits;
if (intToTalCount < 1)
return listScoreDoc;
int currPage = intToTalCount % pageSize == 0 ? intToTalCount / pageSize : intToTalCount / pageSize + 1;
int top = pageSize;         //当前页记录数目
if (pageIndex == currPage)
{
top = intToTalCount - (currPage - 1) * pageSize;
}
listScoreDoc = hits.Reverse().Take(top).Reverse().ToList();    //两次反转获取当前页的记录集合
return listScoreDoc;
}
/// <summary>
/// 获取lucene查询结果集 使用TopDocs
/// </summary>
/// <param name="query">Query</param>
/// <param name="searcher">Searcher</param>
/// <param name="sort"></param>
/// <param name="pageIndex">当前页</param>
/// <param name="pageSize">页面大小</param>
/// <param name="intToTalCount">结果数目</param>
/// <returns></returns>
protected List<ScoreDoc> GetScoreDocDemo3(Query query, Searcher searcher, Sort sort, int pageIndex, int pageSize, out int intToTalCount)
{
List<ScoreDoc> listScoreDoc = new List<ScoreDoc>();
TopDocs topDocs = searcher.Search(query, (Filter)null, pageIndex * pageSize, sort);
ScoreDoc[] hits = topDocs.scoreDocs;
intToTalCount = topDocs.totalHits;
if (intToTalCount < 1)
return listScoreDoc;
int currPage = intToTalCount % pageSize == 0 ? intToTalCount / pageSize : intToTalCount / pageSize + 1;
int top =  pageSize;         //当前页记录数目
pageIndex = Math.Min(currPage, pageIndex);
if (pageIndex == currPage)
{
top = intToTalCount - (currPage - 1) * pageSize;
}
listScoreDoc = hits.Reverse().Take(top).Reverse().ToList();    //两次反转获取当前页的记录集合
return listScoreDoc;
}
/// <summary>
/// 拆分关键词
/// </summary>
/// <param name="keywords"></param>
/// <param name="ktTokenizer"></param>
/// <returns></returns>
public string GetKeyWordsSplitBySpace(string keywords, PanGuTokenizer ktTokenizer)
{
StringBuilder result = new StringBuilder();
ICollection<WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords);
foreach (WordInfo word in words)
{
if (word == null)
{
continue;
}
result.AppendFormat("{0}^{1}.0 ", word.Word, (int)Math.Pow(3, word.Rank));
}
return result.ToString().Trim();
}


应用的网站上的具体效果如下图,也可以进入网站试试具体效果:lucene.net模糊查询



内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: