您的位置:首页 > 其它

.net lucene 实战搜索(三)----- 基本之搜索

2007-10-11 16:53 337 查看
上一节,主要叙述了索引的构建。这一节主要谈谈资料的搜索。上回说过索引本身也是一个数据库。它也有自己的sql语法。

详细请参阅:http://lucene.apache.org/java/docs/queryparsersyntax.html 这里不一一详细说明。

查询参数类别

public enum QueryItemType

{

And = 0,

Or = 1,

Range = 2,

Fuzzy = 3,

Wildcard = 4,

TimeRange = 5

}

查询条件参数

[Serializable]

public class QueryItem

{

private string fieldText = string.Empty;

private string fieldName = string.Empty;

private string value = string.Empty;

private string value_1 = string.Empty;

private QueryItemType type = QueryItemType.And;

string[] specialcharacters = new string[] { "+", "-", "&&", "||", "!", "(", ")", "{", "}", "[", "]", "^", "\"", "~", "*", "?", " :", "\\" };

private bool isToLower = true;

private bool isTextQuery = false;

public QueryItem()

{

}

public QueryItem(string sfieldName, string svalue, string svalue_1, QueryItemType stype)

{

fieldName = sfieldName;

Value = svalue;

Value_1 = svalue_1;

type = stype;

}

public QueryItem(string ftext, string sfieldName, string svalue, string svalue_1, QueryItemType stype)

{

this.fieldText = ftext;

fieldName = sfieldName;

Value = svalue;

Value_1 = svalue_1;

type = stype;

}

public string FieldName

{

get

{

return fieldName;

}

set

{

fieldName = value;

}

}

public string Value

{

get

{

return this.value;

}

set

{

string temp = value;

foreach (string str in specialcharacters)

{

temp.Replace(str, " ");

}

this.value = temp;

}

}

public string Value_1

{

get

{

return value_1;

}

set

{

string temp = value;

foreach (string str in specialcharacters)

{

temp.Replace(str, " ");

}

value_1 = temp;

}

}

public QueryItemType Type

{

get

{

return type;

}

set

{

type = value;

}

}

public string FieldText

{

get

{

return fieldText;

}

set

{

fieldText = value;

}

}

public bool IsToLower

{

get { return isToLower; }

set { isToLower = value; }

}

public bool IsTextQuery

{

get { return isTextQuery; }

set { isTextQuery = value; }

}

}

搜索参数

[Serializable]

public class SearchParameter

{

string[] stopwords = new string[] {"[","]","+","-","or","and","to",":","~","*",

"!","@","#","$","%","^","&","*","(",")","|","?","_","\\","//","'","\"",";","{","}","=","<",">"};

private int _pagesize = 100;

private int _pageindex = 1;

private List<QueryItem> queryitem = new List<QueryItem>();

private Hashtable _keywordfilter = new Hashtable();

private AnalyzerEnum _analyzertype;

private string _indexdir;

private List<SortItem> _sortItems;

public string IndexDir

{

get { return _indexdir; }

set { _indexdir = value; }

}

public int PageSize

{

get { return _pagesize; }

set { _pagesize = value; }

}

public int PageIndex

{

get { return _pageindex; }

set { _pageindex = value; }

}

public List<QueryItem> QueryItems

{

get { return queryitem; }

set

{

foreach (QueryItem item in value)

{

item.Value = ReplaceStr(item.Value);

}

queryitem = value;

}

}

public Hashtable KeywordFilter

{

get { return _keywordfilter; }

}

public void AddKeywordFilter(string QueryField, string QueryText)

{

_keywordfilter.Add(QueryField, QueryText);

}

public List<SortItem> SortItems

{

set

{

_sortItems = value;

}

}

public SortField[] SortFields

{

get

{

if (_sortItems == null) return null;

SortField[] sortfield = new SortField[_sortItems.Count];

for (int i = 0; i < _sortItems.Count; i++)

{

SortField sortitem = new SortField(ReplaceStr(_sortItems[i].FieldName.ToUpper()), _sortItems[i].ASC);

sortfield[i] = sortitem;

}

return sortfield;

}

}

public void RemoveKeywordFilter(string QueryField)

{

_keywordfilter.Remove(QueryField);

}

public AnalyzerEnum AnalyzerType

{

set { _analyzertype = value; }

get { return _analyzertype; }

}

public string GetQueryText()

{

if (queryitem.Count > 0)

{

foreach (QueryItem item in queryitem)

{

if (item.IsTextQuery)

{

return item.Value;

}

}

}

return string.Empty;

}

private string ReplaceStr(string source)

{

for (int i = 0; i < stopwords.Length; i++)

{

source = source.Replace(stopwords[i], " ");

}

return source;

}

}

[Serializable]

public class SortItem

{

//是否降序。true降序,false升序

public SortItem()

{

}

//是否降序。true降序,false升序

public SortItem(string fieldName, bool asc)

{

FieldName = fieldName;

ASC = asc;

}

public string FieldName = string.Empty;

public bool ASC = true;

}

以上代码说明:查询条件和数据库一样,有 or and not 等条件,QueryItemType就是用来设置条件的类型。

QueryItem是单个条件,可用来实现多个条件查询。SortItem是排序使用的,当然也可以多条件排序。注意默认的情况下luncene是以匹配最多的排序的。

这里只是对单索引进行查询,事实上,luncene是支持多索引分布式查询的。

//搜索器

public class Searcher

{

public DataTable SearchData(SearchParameter parm, string fields, out int count)

{

return SearchData(parm, fields, "", out count);

}

public DataTable SearchData(SearchParameter parm, string fields, string lightfields, out int count)

{

DataTable dt = new DataTable();

Hits hits = null;

IndexSearcher searcher = null;

fields = fields.ToUpper();

lightfields = lightfields.ToUpper();

count = 0;

try

{

searcher = new IndexSearcher(parm.IndexDir);

Query query = Formater.FormatSearchItem(parm);

if (parm.SortFields != null)

{

Sort sort = new Sort(parm.SortFields);

hits = searcher.Search(query, sort);

}

else

{

hits = searcher.Search(query);

}

if (hits == null || hits.Length() <= 0) return null;

int start = (parm.PageIndex - 1) * parm.PageSize;

int end = (parm.PageIndex) * parm.PageSize;

if (hits.Length() <= end)

{

end = hits.Length();

}

count = hits.Length();

DataColumn dc;

string[] fieldcolumns = fields.Split(',');

//创建表格

foreach (string field in fieldcolumns)

{

dc = new DataColumn();

dc.DataType = Type.GetType("System.String");

dc.ColumnName = field;

dt.Columns.Add(dc);

}

//将数据倒入表

if (lightfields.Length > 0)

{

string text = parm.GetQueryText();

List<string> tlist = new List<string>();

StringReader sr = new StringReader(text);

TokenStream ts = Formater.GetAnalyzer(parm.AnalyzerType).TokenStream("", sr);

Token token;

while ((token = ts.Next()) != null)

{

tlist.Add(token.TermText());

}

sr.Close();

for (int i = start; i < end; i++)

{

DataRow dr = dt.NewRow();

foreach (string field in fieldcolumns)

{

string temp = hits.Doc(i).Get(field);

if (text.Length > 0)

{

string[] lightfield = lightfields.Split(',');

foreach (string lfd in lightfield)

{

if (field == lfd)

{

foreach (string str in tlist)

{

temp = temp.Replace(str, string.Format("<font color='red'><b>{0}</b></font>", str));

}

}

}

}

dr[field] = temp;

}

dt.Rows.Add(dr);

}

}

else

{

for (int i = start; i < end; i++)

{

DataRow dr = dt.NewRow();

foreach (string field in fieldcolumns)

{

string temp = hits.Doc(i).Get(field);

dr[field] = temp;

}

dt.Rows.Add(dr);

}

}

}

catch (Exception err)

{

;

}

finally

{

if (searcher != null)

{

searcher.Close();

}

}

return dt;

}

public DataTable SearchDataAllField(SearchParameter parm, string lightfield, out int count)

{

DataTable dt = new DataTable();

Hits hits = null;

IndexSearcher searcher = null;

lightfield = lightfield.ToUpper();

count = 0;

try

{

searcher = new IndexSearcher(parm.IndexDir);

Query query = Formater.FormatSearchItem(parm);

if (parm.SortFields != null)

{

Sort sort = new Sort(parm.SortFields);

hits = searcher.Search(query, sort);

}

else

{

hits = searcher.Search(query);

}

if (hits == null || hits.Length() <= 0) return null;

int start = (parm.PageIndex - 1) * parm.PageSize;

int end = (parm.PageIndex) * parm.PageSize;

if (hits.Length() <= end)

{

end = hits.Length();

}

count = hits.Length();

DataColumn dc;

//创建表格

foreach (Field field in hits.Doc(0).Fields())

{

dc = new DataColumn();

dc.DataType = Type.GetType("System.String");

dc.ColumnName = field.Name();

dt.Columns.Add(dc);

}

//将数据倒入表

for (int i = start; i < end; i++)

{

DataRow dr = dt.NewRow();

foreach (Field field in hits.Doc(i).Fields())

{

string temp = field.StringValue();

foreach (QueryItem item in parm.QueryItems)

{

if (lightfield.Length > 0 && field.Name() == lightfield)

{

temp = temp.Replace(item.Value, string.Format("<font color=orangered><b>{0}</b></font>", item.Value));

break;

}

}

dr[field.Name()] = temp;

}

dt.Rows.Add(dr);

}

}

catch (Exception err)

{

;

}

finally

{

if (searcher != null)

{

searcher.Close();

}

}

return dt;

}

public DataTable SearchDataDistinct(SearchParameter parm, string fieldName, int count)

{

DataTable dt = new DataTable();

Hits hits = null;

IndexSearcher searcher = null;

fieldName = fieldName.ToUpper();

int ccount = 0;

try

{

searcher = new IndexSearcher(parm.IndexDir);

Query query = Formater.FormatSearchItem(parm);

if (parm.SortFields != null)

{

Sort sort = new Sort(parm.SortFields);

hits = searcher.Search(query, sort);

}

else

{

hits = searcher.Search(query);

}

if (hits == null || hits.Length() <= 0) return null;

DataColumn dc;

//创建表格

dc = new DataColumn();

dc.DataType = Type.GetType("System.String");

dc.ColumnName = fieldName;

dt.Columns.Add(dc);

//将数据倒入表

string temp = string.Empty;

for (int i = 0; i < hits.Length(); i++)

{

if (ccount >= count) break;

string currstring = hits.Doc(i).GetField(fieldName).StringValue();

if (currstring.Trim().Length <= 0) continue;

if (currstring != temp)

{

DataRow dr = dt.NewRow();

dr[fieldName] = hits.Doc(i).GetField(fieldName).StringValue();

dt.Rows.Add(dr);

ccount++;

temp = currstring;

}

}

}

catch (Exception err)

{

;

}

finally

{

if (searcher != null)

{

searcher.Close();

}

}

return dt;

}

public int SearchCount(SearchParameter parm)

{

Hits hits = null;

IndexSearcher searcher = null;

int count = 0;

try

{

searcher = new IndexSearcher(parm.IndexDir);

Query query = Formater.FormatSearchItem(parm);

hits = searcher.Search(query);

count = hits.Length();

return count;

}

catch

{

return 0;

}

finally

{

if (searcher != null)

{

searcher.Close();

}

}

}

public int GetDocID(SearchParameter parm)

{

Hits hits = null;

IndexSearcher searcher = new IndexSearcher(parm.IndexDir);

try

{

Query query = Formater.FormatSearchItem(parm);

if (parm.SortFields != null)

{

Sort sort = new Sort(parm.SortFields);

hits = searcher.Search(query, sort);

}

else

{

hits = searcher.Search(query);

}

if (hits == null || hits.Length() <= 0) return 0;

if (hits.Length() > 0)

{

return hits.Id(0);

}

return 0;

}

finally

{

if (searcher != null)

{

searcher.Close();

}

}

}

public List<int> GetDocIDs(SearchParameter parm)

{

Hits hits = null;

List<int> slist = new List<int>();

IndexSearcher searcher = new IndexSearcher(parm.IndexDir);

try

{

Query query = Formater.FormatSearchItem(parm);

if (parm.SortFields != null)

{

Sort sort = new Sort(parm.SortFields);

hits = searcher.Search(query, sort);

}

else

{

hits = searcher.Search(query);

}

if (hits == null || hits.Length() <= 0) return slist;

for (int i = 0; i < hits.Length(); i++)

{

slist.Add(hits.Id(i));

}

return slist;

}

finally

{

if (searcher != null)

{

searcher.Close();

}

}

}

}

以上是对luncene搜索的封装,可分页查询数据,针对单索引。

原来luncene有个高亮命中词的组建,不知道是不是我比较愚钝,一直没明白怎么用,而且跟版本捆绑太紧密,所以我将搜索结果通过分词替换掉。

下面是应用举例:

SearchParameter sp = new SearchParameter();

sp.AnalyzerType = AnalyzerEnum.SmartSegmentAnalyzer;

sp.IndexDir = UICommon.IndexDir;

sp.PageIndex = 1;

QueryItem item;

List<QueryItem> list;

Searcher search = new Searcher();

DataTable mdt;

item = new QueryItem();

item.FieldName = "artitype";

item.Value = ((int)SDataType.Course).ToString();

list.Add(item);

sp.QueryItems = list;

sp.PageSize = 3;

try

{

mdt = search.SearchData(sp, "courseid,coursename", out ccount);

}

catch

{

sp.IndexDir = UICommon.TempIndexDir;

mdt = new Searcher().SearchData(sp, "courseid,coursename", out ccount);

}

以表的形式返回,而lucene是以docuemnt的方式返回的。

以上就是luncene的基本使用,索引----〉搜索。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: