您的位置:首页 > 其它

Lucene.net搜索及高亮分页

2008-04-03 10:34 489 查看
using System;
using System.Data;
using System.IO;
using System.Text.RegularExpressions;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Documents;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
using Lucene.Net.Search.Highlight;
using Lucene.Net.Analysis;
using Lucene.Net.Index;
using System.Configuration;

namespace So
...{
public class BaseSearch : System.Web.UI.Page
...{
变量声明#region 变量声明
/**//// <summary>
/// 搜索结果数据
/// </summary>
public DataTable Results = new DataTable();

/**//// <summary>
/// 开始索引
/// </summary>
public int startAt;

/**//// <summary>
/// First item on page (user format).
/// </summary>
public int fromItem;

/**//// <summary>
/// Last item on page (user format).
/// </summary>
public int toItem;

/**//// <summary>
/// 搜索的结果总数
/// </summary>
public int total;

/**//// <summary>
/// 搜索所用时间
/// </summary>
public TimeSpan duration;

/**//// <summary>
/// 每页显示结果项条数
/// </summary>
public int maxResults = 10;

/**//// <summary>
/// 是否启用网页缓存功能
/// </summary>
public bool EnableCache;
/**//// <summary>
/// 缓存URL
/// </summary>
public string CacheURL;
/**//// <summary>
/// 索引文件存放的路径
/// </summary>
public string IndexDiectory;
private string m_Query;
public Lucene.Net.Store.Directory dir;

#endregion 变量声明

取得查询目标索引的缓存#region 取得查询目标索引的缓存
public void GetIndexDir(string IndexKey, string IndexDiectory)
...{
//object obj = Cache[IndexKey];
//if (obj != null)
//{
// dir = (Lucene.Net.Store.Directory)obj;
//}
//else
//{
// dir = new Lucene.Net.Store.RAMDirectory(IndexDiectory);
// Cache.Insert(IndexKey, dir, null, DateTime.Now.AddMinutes(2), TimeSpan.Zero);
//}
dir = new Lucene.Net.Store.RAMDirectory(IndexDiectory);
}
#endregion

得到定长的字符串#region 得到定长的字符串
/**//// <summary>
/// 得到定长的字符串
/// </summary>
/// <param name="p_Text">原字符串</param>
/// <param name="p_Length">长度</param>
/// <param name="p_ExtraText">多余部分显示字符</param>
/// <returns></returns>
public string GetLengthText(string p_Text, int p_Length, string p_ExtraText)
...{
return (p_Text.Length > p_Length) ? (p_Text.Substring(0, 45) + p_ExtraText) : p_Text;
}
#endregion

取得两个参数中的最小值#region 取得两个参数中的最小值
/**//// <summary>
/// 取得两个参数中的最小值
/// </summary>
/// <param name="first">参数一</param>
/// <param name="second">参数二</param>
/// <returns>最小值</returns>
public int smallerOf(int first, int second)
...{
return first < second ? first : second;
}
#endregion

检测开始位置#region 检测开始位置
/**//// <summary>
/// Initializes startAt value. Checks for bad values.
/// </summary>
/// <returns></returns>
public int initStartAt()
...{
try
...{
int sa = Convert.ToInt32(this.Request.Params["start"]);

// too small starting item, return first page
if (sa < 0)
return 0;

// too big starting item, return last page
if (sa >= total - 1)
...{
return lastPageStartsAt;
}

return sa;
}
catch
...{
return 0;
}
}
#endregion

最后一页的第一项#region 最后一页的第一项

/**//// <summary>
/// First item of the last page
/// </summary>
public int lastPageStartsAt
...{
get
...{
return pageCount * maxResults;
}
}
public int pageCount
...{
get
...{
return (total - 1) / maxResults; // floor
}
}
#endregion

取得高亮连接#region 取得高亮连接

/**//// <summary>
/// 取得高亮连接
/// </summary>
/// <param name="p_Body">处理内容</param>
/// <param name="p_KeyWords">关键词</param>
/// <returns></returns>
public string SimpleHighLighter(string p_Body, string p_KeyWords, string p_Before,
string p_After, int p_MaxLength)
...{
string[] KeyWords = p_KeyWords.Trim().Split(' ');

//if (p_Body.Length > p_MaxLength)
//{
// if (p_Body.IndexOf(KeyWords[0]) > 10)
// {
// try
// {
// if ((p_Body.Length - 10) > p_MaxLength)
// p_Body = p_Body.Substring(p_Body.IndexOf(KeyWords[0]) - 10, p_MaxLength) + "...";
// else
// p_Body = p_Body.Substring(p_Body.IndexOf(KeyWords[0]) - 10) + "...";
// }
// catch
// { }
// }
// else
// p_Body = p_Body.Substring(0, p_MaxLength) + "...";

//}

for (int i = 0; i < KeyWords.Length; i++)
...{
p_Body = p_Body.Replace(KeyWords[i], p_Before + KeyWords[i] + p_After);

}

return p_Body;

}
#endregion

属性#region 属性
/**//// <summary>
/// 查询关键词
/// </summary>
public string Query
...{
get
...{
return m_Query;
}
set
...{
m_Query = value;
}
}
#endregion
}
}

using System;
using System.Data;
using System.IO;
using System.Text.RegularExpressions;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Documents;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
using Lucene.Net.Search.Highlight;
using Lucene.Net.Analysis;
using Lucene.Net.Index;
using System.Configuration;

namespace So.News
...{
public class NewsSearch : BaseSearch
...{
public NewsSearch()
...{
this.IndexDiectory = ConfigurationManager.AppSettings["NewsIndexPath"];
}

处理搜索并将信息转换为可显示结果数据源#region 处理搜索并将信息转换为可显示结果数据源

/**//// <summary>
/// Does the search and stores the information about the results.
/// </summary>
public void search()
...{

// 索引目录
//string indexDirectory = Server.MapPath(ConfigurationSettings.AppSettings["EnableCache"] );
//创建一个Searcher用于搜索

//记录查询开始的时间
DateTime start = DateTime.Now;
this.GetIndexDir("HDC.News", IndexDiectory);
IndexSearcher searcher = new IndexSearcher(dir);
//从"body"字段搜索
//Console.WriteLine(this.Query);

Lucene.Net.Analysis.Analyzer OneAnalyzer = new StandardAnalyzer();
QueryParser parser = new QueryParser("newsContent", OneAnalyzer);
Query query = parser.Parse(this.Query);

//创建结果记录集
//定义字段
this.Results.Columns.Add("ArticleID", typeof(int));
this.Results.Columns.Add("ArticleClassID", typeof(int));
this.Results.Columns.Add("className", typeof(string));
this.Results.Columns.Add("titleImg", typeof(string));
this.Results.Columns.Add("updateTime", typeof(DateTime));
this.Results.Columns.Add("source", typeof(string));
this.Results.Columns.Add("title", typeof(string));
this.Results.Columns.Add("summary", typeof(string));

Sort sort = new Sort(new SortField("ArticleID", SortField.DOC, true));
//Hits是搜索结果记录集,不过是Lucene自己的格式,需要格式化成标准输出
Hits hits = searcher.Search(query, sort);

//结果个数
this.total = hits.Length();

/**/////创建高亮显示
//Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<font color="#C60A00">", "</font>"), new QueryScorer(query));
//highlighter.TextFragmenter = new SimpleFragmenter(160);
//highlighter.MaxDocBytesToAnalyze = 256;

// initialize startAt
this.startAt = initStartAt();

// how many items we should show - less than defined at the end of the results
int resultsCount = smallerOf(total, this.maxResults + this.startAt);

for (int i = startAt; i < resultsCount; i++)
...{
Document doc = hits.Doc(i);
DataRow row = this.Results.NewRow();

row["ArticleID"] = Convert.ToInt32(doc.Get("ArticleID"));
row["ArticleClassID"] = Convert.ToInt32(doc.Get("ArticleClassID"));
string summary = doc.Get("summary");

row["summary"] = this.SimpleHighLighter(summary, this.Query,"<font color="#C60A00">", "</font>",226);
row["className"] = doc.Get("className");
row["titleImg"] = doc.Get("titleImg");
row["updateTime"] = Convert.ToDateTime(doc.Get("updateTime"));
row["source"] = doc.Get("source");
row["title"] = doc.Get("title");
this.Results.Rows.Add(row);
}
searcher.Close();

// result information

this.fromItem = startAt + 1;
this.toItem = smallerOf(startAt + maxResults, total);
//记录查询使用的时间
this.duration = DateTime.Now - start;
}
#endregion

页面底航连接#region 页面底航连接
/**//// <summary>
/// 页面底航连接
/// </summary>
public DataTable Paging
...{
get
...{
int pageNumber = (startAt + maxResults - 1) / maxResults;

DataTable dt = new DataTable();
dt.Columns.Add("html", typeof(string));

//增加第一页链接
DataRow tar = dt.NewRow();
if (startAt >= maxResults)
tar["html"] = "<EM><a href="/News/?q=" + Server.UrlEncode(this.Query) + "&start=" + (startAt - maxResults) + ""><IMG src="images/b_pre.gif"></a></EM>";
else
tar["html"] = "<EM><IMG src="images/b_pre.gif"></EM>";
dt.Rows.Add(tar);

int previousPagesCount = 7;

DataRow ar = dt.NewRow();
ar["html"] = pagingItemHtml(startAt, pageNumber + 1, false);
dt.Rows.Add(ar);

for (int i = pageNumber - 1; i >= 0 && i >= pageNumber - previousPagesCount; i--)
...{
int step = i - pageNumber;
DataRow r = dt.NewRow();
r["html"] = pagingItemHtml(startAt + (maxResults * step), i + 1, true);

dt.Rows.InsertAt(r, 1);
}

int nextPagesCount = 8;
for (int i = pageNumber + 1; i <= pageCount && i <= pageNumber + nextPagesCount; i++)
...{
int step = i - pageNumber;
DataRow r = dt.NewRow();
r["html"] = pagingItemHtml(startAt + (maxResults * step), i + 1, true);

dt.Rows.Add(r);
}

//增加第一页链接
DataRow far = dt.NewRow();
if (pageNumber < pageCount)
far["html"] = "<EM><a class="blue1" href="/News/?q=" + Server.UrlEncode(this.Query) + "&start=" + (startAt + maxResults) + ""><IMG src="images/b_nextpage.gif"></a></EM>";
else
far["html"] = "<EM><IMG src="images/b_nextpage.gif"></EM>";
dt.Rows.Add(far);
return dt;
}
}

页面连接列表#region 页面连接列表

/**//// <summary>
/// 页面连接列表
/// </summary>
/// <param name="start">开始</param>
/// <param name="number">显示数量</param>
/// <param name="active">活动</param>
/// <returns></returns>
public string pagingItemHtml(int start, int number, bool active)
...{

if (active)
return "<VAR><a href="/News/?q=" + Server.UrlEncode(this.Query) + "&start=" + start + "">" + number + "</a></VAR>";
else
return "<VAR class=on>" + number + "</VAR>";
}
#endregion

#endregion

}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: