您的位置:首页 > 其它

orchard之lucene.net索引生成

2012-06-07 20:28 246 查看
orchard是微软自己团队搞的cms,园子里有很多园友已经对系统结构详细分析了,但是对里面的某些模块没有一一分析,因为需要使用lucene.net做站内搜索,所以参考学习一下,ps一下lucene.net已经是2.9.4版本了,虽然还在孵化器,但是还是更新了,不容易啊。

点开Modules开启lucene相关应用,如下图。

View Code



/// <summary>
/// Indexes a batch of content items
/// </summary>
/// <returns>
/// <c>true</c> if there are more items to process; otherwise, <c>false</c>.
/// </returns>
private bool BatchIndex(string indexName, string settingsFilename, IndexSettings indexSettings) {
var addToIndex = new List<IDocumentIndex>();
var deleteFromIndex = new List<int>();

// Rebuilding the index ?
if (indexSettings.Mode == IndexingMode.Rebuild) {
Logger.Information("Rebuilding index");
_indexingStatus = IndexingStatus.Rebuilding;

// load all content items
var contentItems = _contentRepository
.Fetch(
versionRecord => versionRecord.Published && versionRecord.Id > indexSettings.LastContentId,
order => order.Asc(versionRecord => versionRecord.Id))
.Take(ContentItemsPerLoop)
.Select(versionRecord => _contentManager.Get(versionRecord.ContentItemRecord.Id, VersionOptions.VersionRecord(versionRecord.Id)))
.Distinct()
.ToList();

// if no more elements to index, switch to update mode
if (contentItems.Count == 0) {
indexSettings.Mode = IndexingMode.Update;
}

foreach (var item in contentItems) {
try {
IDocumentIndex documentIndex = ExtractDocumentIndex(item);

if (documentIndex != null && documentIndex.IsDirty) {
addToIndex.Add(documentIndex);
}

indexSettings.LastContentId = item.VersionRecord.Id;
}
catch (Exception ex) {
Logger.Warning(ex, "Unable to index content item #{0} during rebuild", item.Id);
}
}
}

if (indexSettings.Mode == IndexingMode.Update) {
Logger.Information("Updating index");
_indexingStatus = IndexingStatus.Updating;

var indexingTasks = _taskRepository
.Fetch(x => x.Id > indexSettings.LastIndexedId)
.OrderBy(x => x.Id)
.Take(ContentItemsPerLoop)
.GroupBy(x => x.ContentItemRecord.Id)
.Select(group => new {TaskId = group.Max(task => task.Id), Delete = group.Last().Action == IndexingTaskRecord.Delete, Id = group.Key, ContentItem = _contentManager.Get(group.Key, VersionOptions.Published)})
.OrderBy(x => x.TaskId)
.ToArray();

foreach (var item in indexingTasks) {
try {
// item.ContentItem can be null if the content item has been deleted
IDocumentIndex documentIndex = ExtractDocumentIndex(item.ContentItem);

if (documentIndex == null || item.Delete) {
deleteFromIndex.Add(item.Id);
}
else if (documentIndex.IsDirty) {
addToIndex.Add(documentIndex);
}

indexSettings.LastIndexedId = item.TaskId;
}
catch (Exception ex) {
Logger.Warning(ex, "Unable to index content item #{0} during update", item.Id);
}
}
}

// save current state of the index
indexSettings.LastIndexedUtc = _clock.UtcNow;
_appDataFolder.CreateFile(settingsFilename, indexSettings.ToXml());

if (deleteFromIndex.Count == 0 && addToIndex.Count == 0) {
// nothing more to do
_indexingStatus = IndexingStatus.Idle;
return false;
}

// save new and updated documents to the index
try {
if (addToIndex.Count > 0) {
_indexProvider.Store(indexName, addToIndex);
Logger.Information("Added content items to index: {0}", addToIndex.Count);
}
}
catch (Exception ex) {
Logger.Warning(ex, "An error occured while adding a document to the index");
}

// removing documents from the index
try {
if (deleteFromIndex.Count > 0) {
_indexProvider.Delete(indexName, deleteFromIndex);
Logger.Information("Added content items to index: {0}", addToIndex.Count);
}
}
catch (Exception ex) {
Logger.Warning(ex, "An error occured while removing a document from the index");
}

return true;
}




其中重要的一点是从Task中取出索引任务然后添加到lucene文档



var indexingTasks = _taskRepository
.Fetch(x => x.Id > indexSettings.LastIndexedId)
.OrderBy(x => x.Id)
.Take(ContentItemsPerLoop)
.GroupBy(x => x.ContentItemRecord.Id)
.Select(group => new {TaskId = group.Max(task => task.Id), Delete = group.Last().Action == IndexingTaskRecord.Delete, Id = group.Key, ContentItem = _contentManager.Get(group.Key, VersionOptions.Published)})
.OrderBy(x => x.TaskId)
.ToArray();

foreach (var item in indexingTasks) {
try {
// item.ContentItem can be null if the content item has been deleted
IDocumentIndex documentIndex = ExtractDocumentIndex(item.ContentItem);

if (documentIndex == null || item.Delete) {
deleteFromIndex.Add(item.Id);
}

else if (documentIndex.IsDirty) {
addToIndex.Add(documentIndex);
}

indexSettings.LastIndexedId = item.TaskId;
}
catch (Exception ex) {
Logger.Warning(ex, "Unable to index content item #{0} during update", item.Id);
}
}




处理完文档过后存储文档到索引的代码如下:



// save new and updated documents to the index
try {
if (addToIndex.Count > 0) {
//将文档存储到索引
_indexProvider.Store(indexName, addToIndex);
Logger.Information("Added content items to index: {0}", addToIndex.Count);
}
}
catch (Exception ex) {
Logger.Warning(ex, "An error occured while adding a document to the index");
}




最终的索引存储处理在Lucene.Services.LuceneIndexProvider



public void Store(string indexName, IEnumerable<LuceneDocumentIndex> indexDocuments) {
if (indexDocuments.AsQueryable().Count() == 0) {
return;
}

// Remove any previous document for these content items
Delete(indexName, indexDocuments.Select(i => i.ContentItemId));

var writer = new IndexWriter(GetDirectory(indexName), _analyzer, false, IndexWriter.MaxFieldLength.UNLIMITED);
LuceneDocumentIndex current = null;

try {

foreach (var indexDocument in indexDocuments) {
current = indexDocument;
//将自定义的indexDocument处理成lucene的文档
var doc = CreateDocument(indexDocument);

writer.AddDocument(doc);
Logger.Debug("Document [{0}] indexed", indexDocument.ContentItemId);
}
}
catch (Exception ex) {
Logger.Error(ex, "An unexpected error occured while add the document [{0}] from the index [{1}].", current.ContentItemId, indexName);
}
finally {
writer.Optimize();
writer.Close();
}
}




至此lucene的索引算是创建完毕,但是中间的一系列消息和任务之间的传递细节还需要进一步深入学习,错误之处希望园友们能够给予指正。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: