lucene.net2.9搜索Demo
2010-12-20 17:07
253 查看
上次用到lucene.net时,其版本还是2.4,现在升级到2.9后,以前的很多方法被抛弃不用,到3.0后,就被删除不用。所以现在把以前弃置不用的类、方法和属性全部用新的替换之,先做个查询demo。
开发环境:vs2010(.net4)+lucene.net2.9+盘古分词2.3.1和高亮显示(也可以用lucene自带的分词和高亮显示)
具体代码如下:
应用的网站上的具体效果如下图,也可以进入网站试试具体效果:lucene.net模糊查询
开发环境:vs2010(.net4)+lucene.net2.9+盘古分词2.3.1和高亮显示(也可以用lucene自带的分词和高亮显示)
具体代码如下:
protected void GetSearchPageDemo() { Stopwatch watch = new Stopwatch(); watch.Start(); //开始计时 string strPath = tbIndexPath.Text; //索引文件所在路径 string strKeyWords = tbKeyWords.Text; //关键字 int pageIndex = 1; //当前页 int intToTalCount = 0; //总记录数 int CurrenPage = 0; //总页数 int pageSize = 10; //页面大小 if (tbPageIndex.Text != "") { pageIndex = Convert.ToInt32(tbPageIndex.Text); } strKeyWords = GetKeyWordsSplitBySpace(strKeyWords, new PanGuTokenizer()); //处理后的关键字 //System.IO.DirectoryInfo dirIndex = new System.IO.DirectoryInfo(strPath); IndexReader reader = IndexReader.Open(FSDirectory.Open(new System.IO.DirectoryInfo(strPath)), true); Searcher searcher = new IndexSearcher(reader); Analyzer analyzer = new PanGuAnalyzer(true); //使用盘古分词控件 ///多字段搜索 string[] quryFeiled = { strKeyWords, strKeyWords, strKeyWords }; string[] filed = { "Title", "Content", "keyName" }; Query query = MultiFieldQueryParser.Parse(Version.LUCENE_29, quryFeiled, filed, analyzer); ////单字段搜索 //QueryParser queryParser = new QueryParser(Version.LUCENE_29, "Title", analyzer); //Query quey = queryParser.Parse(strKeyWords); List<ScoreDoc> hits = null; pageIndex = Math.Max(1, pageIndex); hits = GetScoreDocDemo1(query, searcher, pageIndex, pageSize, out intToTalCount); CurrenPage = intToTalCount % pageSize == 0 ? intToTalCount / pageSize : intToTalCount / pageSize + 1; pageIndex = Math.Min(CurrenPage, pageIndex); StringBuilder sb = new StringBuilder(); sb.AppendFormat("<p>{2}/{3} 总数:{0} 显示前{1}条记录</p>", intToTalCount, hits.Count, pageIndex.ToString(), CurrenPage.ToString()); sb.Append("<table border=/"0/" cellpadding=/"0/" cellspacing=/"0/" width=/"100%/">"); //使用盘古分词控件来显示高亮 PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'>", "</font>"); string titele = ""; for (int i = 0; i < hits.Count; i++) { Document doc = searcher.Doc(hits[i].doc); PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new Segment()); highlighter.FragmentSize = 120; //获取文本大小 sb.Append("<tr>"); titele = highlighter.GetBestFragment(strKeyWords, doc.Get("Title")); sb.AppendFormat("<td><font size='5'>{0}</font></td>", titele == "" ? doc.Get("Title") : titele); sb.AppendFormat("<td>{0}</td>", doc.Get("visits")); sb.AppendFormat("<td>{0}</td>", doc.Get("keyName")); sb.AppendFormat("<td>{0}</td>", doc.Get("Idate")); sb.AppendFormat("<td>{0}</td>", hits[i].score); sb.Append("</tr>"); sb.Append("<tr>"); sb.AppendFormat("<td colspan='5'><div style="padding:10px" mce_style="padding:10px">{0}</div></td>", highlighter.GetBestFragment(strKeyWords, doc.Get("Content"))); sb.Append("</tr>"); } sb.Append("</table>"); reader.Close(); watch.Stop(); lbInformation.Text = "用时:" + watch.Elapsed.Hours + ":" + watch.Elapsed.Minutes + ":" + watch.Elapsed.Seconds + ":" + watch.Elapsed.Milliseconds + sb.ToString(); } /// <summary> /// 获取lucene查询结果集 使用TopScoreDocCollector+TopDocs /// </summary> /// <param name="query">Query</param> /// <param name="searcher">Searcher</param> /// <param name="pageIndex">当前页</param> /// <param name="pageSize">页面大小</param> /// <param name="intToTalCount">结果数目</param> /// <returns></returns> protected List<ScoreDoc> GetScoreDocDemo1(Query query, Searcher searcher, int pageIndex, int pageSize, out int intToTalCount) { List<ScoreDoc> listScoreDoc = new List<ScoreDoc>(); TopScoreDocCollector collector = TopScoreDocCollector.create(searcher.MaxDoc(), true); searcher.Search(query, collector); intToTalCount = collector.GetTotalHits(); if (intToTalCount < 1) return listScoreDoc; int currPage = intToTalCount % pageSize == 0 ? intToTalCount / pageSize : intToTalCount / pageSize + 1; pageIndex = Math.Min(currPage, pageIndex); TopDocs topdoce = collector.TopDocs((pageIndex - 1) * pageSize, pageSize); //获取指定页的记录 ScoreDoc[] hits = topdoce.scoreDocs; listScoreDoc = hits.ToList(); return listScoreDoc; } /// <summary> /// 获取lucene查询结果集 使用TopDocs /// </summary> /// <param name="query">Query</param> /// <param name="searcher">Searcher</param> /// <param name="pageIndex">当前页</param> /// <param name="pageSize">页面大小</param> /// <param name="intToTalCount">结果数目</param> /// <returns></returns> protected List<ScoreDoc> GetScoreDocDemo2(Query query, Searcher searcher, int pageIndex, int pageSize, out int intToTalCount) { List<ScoreDoc> listScoreDoc = new List<ScoreDoc>(); TopDocs topDocs = searcher.Search(query, (Filter)null, pageIndex * pageSize); ScoreDoc[] hits = topDocs.scoreDocs; intToTalCount = topDocs.totalHits; if (intToTalCount < 1) return listScoreDoc; int currPage = intToTalCount % pageSize == 0 ? intToTalCount / pageSize : intToTalCount / pageSize + 1; int top = pageSize; //当前页记录数目 if (pageIndex == currPage) { top = intToTalCount - (currPage - 1) * pageSize; } listScoreDoc = hits.Reverse().Take(top).Reverse().ToList(); //两次反转获取当前页的记录集合 return listScoreDoc; } /// <summary> /// 获取lucene查询结果集 使用TopDocs /// </summary> /// <param name="query">Query</param> /// <param name="searcher">Searcher</param> /// <param name="sort"></param> /// <param name="pageIndex">当前页</param> /// <param name="pageSize">页面大小</param> /// <param name="intToTalCount">结果数目</param> /// <returns></returns> protected List<ScoreDoc> GetScoreDocDemo3(Query query, Searcher searcher, Sort sort, int pageIndex, int pageSize, out int intToTalCount) { List<ScoreDoc> listScoreDoc = new List<ScoreDoc>(); TopDocs topDocs = searcher.Search(query, (Filter)null, pageIndex * pageSize, sort); ScoreDoc[] hits = topDocs.scoreDocs; intToTalCount = topDocs.totalHits; if (intToTalCount < 1) return listScoreDoc; int currPage = intToTalCount % pageSize == 0 ? intToTalCount / pageSize : intToTalCount / pageSize + 1; int top = pageSize; //当前页记录数目 pageIndex = Math.Min(currPage, pageIndex); if (pageIndex == currPage) { top = intToTalCount - (currPage - 1) * pageSize; } listScoreDoc = hits.Reverse().Take(top).Reverse().ToList(); //两次反转获取当前页的记录集合 return listScoreDoc; } /// <summary> /// 拆分关键词 /// </summary> /// <param name="keywords"></param> /// <param name="ktTokenizer"></param> /// <returns></returns> public string GetKeyWordsSplitBySpace(string keywords, PanGuTokenizer ktTokenizer) { StringBuilder result = new StringBuilder(); ICollection<WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords); foreach (WordInfo word in words) { if (word == null) { continue; } result.AppendFormat("{0}^{1}.0 ", word.Word, (int)Math.Pow(3, word.Rank)); } return result.ToString().Trim(); }
应用的网站上的具体效果如下图,也可以进入网站试试具体效果:lucene.net模糊查询
相关文章推荐
- 完整的站内搜索Demo(Lucene.Net+盘古分词)
- 完整的站内搜索Demo(Lucene.Net+盘古分词)
- Lucene.Net2.9(中科院分词.net 版) DEMO
- Lucene.net全文搜索示例Demo
- 盘古分词在 Lucene.net 2.9 版本下搜索没有结果的原因分析及盘古分词2.0版本要开发的新功能
- 完整的站内搜索Demo(Lucene.Net+盘古分词)
- 完整的站内搜索Demo(Lucene.Net+盘古分词)
- 完整的站内搜索Demo(Lucene.Net+盘古分词)
- lucene.net 3.0.3、结合盘古分词进行搜索的小例子(分页功能)
- Lucene.Net 2.3.1开发介绍 —— 四、搜索(一)
- Lucene.net多字段 - 多索引目录搜索【转】
- 运行官方Lucene.net的demo
- 利用Lucene.net搭建站内搜索(2)---分词技术
- 利用Lucene.net搜索引擎进行多条件搜索的做法
- Lucene.net多字段(Fields)、多索引目录(IndexSearcher)搜索
- 使用Lucene.NET实现站内搜索
- Lucene.Net 2.3.1开发介绍 —— 四、搜索(二)
- lucene .NET 搜索图片 功能实现
- Lucene.NET建立,搜索多个索引文件
- lucene.net helper类 【结合盘古分词进行搜索的小例子(分页功能)】