您的位置:首页 > 其它

BatchFileProcessing(2)--实现之解析文件

2011-04-05 09:41 211 查看
前一篇写到BatchFileProcessing的流程设计,这篇总结下解析文件的实现。

目前我的产品支持用户上传excel,csv,tab delimited等三种文件格式,鉴于office com组件性能太差,所以我们购买了aspose cells组件。下面我们以批量更新商品库存为例,阐述一下解析文件的实现。

当我们从数据库读取到尚未处理的用户上传文件后,我们会将它下载到服务器磁盘上,然后开始解析。

  文件级别的检查下包括以下几项:

  (1)文件是否可以正常打开,如果不能正常打开,记录异常并通知客户。

  (2)文件中的关键列是否有缺失,如果有缺失,记录异常并通知客户。

  (3)文件中的列是否有重复,如果有重复,记录异常并通知客户。

  (4)文件中是否有未在模版中未定义的列,如果有未在模版中未定义的列,记录异常并通知客户。

批量更新商品库存的业务检查下包括以下几项:

(1)商品编号是否提供,如果没有提供,记录异常到数据库。

(2)商品库存是否提供,如果没有提供,使用默认值零;如果提供了,验证是否在有效的范围内,比如0~999999,如果不合法,记录异常到数据库。

用 Aspose Cells组件读取数据:

1 using System;
2 using System.Collections.Generic;
3 using System.Linq;
4 using System.Text;
5 using Aspose.Cells;
6
7 namespace BatchFile.Jobs.FileParsers
8 {
9 public class AsposeCellParser : IExcelParser
10 {
11 private string m_fileName;
12 private string m_fileExt;
13 private FileFormatType m_fileFormatType;
14 private Workbook m_workbook;
15 private Worksheet m_worksheet;
16
17 public AsposeCellParser(string fileName, string fileExt)
18 {
19 m_fileName = fileName;
20 m_fileExt = fileExt;
21 if (FileExtType.IsCSV(m_fileExt))
22 {
23 m_fileFormatType = FileFormatType.CSV;
24 }
25 else if (FileExtType.IsExcel2007(m_fileExt))
26 {
27 m_fileFormatType = FileFormatType.Excel2007Xlsx;
28 }
29 else if (FileExtType.IsExcel2003(m_fileExt))
30 {
31 m_fileFormatType = FileFormatType.Excel2003;
32 }
33 else if (FileExtType.IsTabDelimited(m_fileExt))
34 {
35 m_fileFormatType = FileFormatType.TabDelimited;
36 }
37 else
38 {
39 throw new NotSupportedException(string.Format("File extension({0}) cannot be supported.", fileExt));
40 }
41 }
42
43 #region IExcelParser
44
45 public string ExcelFile
46 {
47 get { return m_fileName; }
48 }
49
50 public int RowCount
51 {
52 get { return m_worksheet.Cells.MaxDataRow; }
53 }
54
55 public int ColumnCount
56 {
57 get { return m_worksheet.Cells.MaxDataColumn; }
58 }
59
60 public object GetCellValue(int row, int col)
61 {
62 return m_worksheet.Cells[row, col].Value;
63 }
64
65 public void ActiveWorksheet(int activeWorksheetIndex)
66 {
67 if (activeWorksheetIndex < 0 || activeWorksheetIndex >= m_workbook.Worksheets.Count)
68 {
69 throw new ArgumentOutOfRangeException("Worksheet index is out of range.");
70 }
71 m_worksheet = m_workbook.Worksheets[activeWorksheetIndex];
72 }
73
74 public void ActiveWorksheet(string activeWorksheetName)
75 {
76 if (string.IsNullOrWhiteSpace(activeWorksheetName))
77 {
78 throw new ArgumentException("Worksheet name cannot be null or empty.");
79 }
80 m_worksheet = m_workbook.Worksheets[activeWorksheetName];
81 }
82
83 public void OpenFile()
84 {
85 m_workbook = new Workbook();
86 m_workbook.ConvertNumericData = false;
87 m_workbook.Open(m_fileName, m_fileFormatType);
88 m_worksheet = m_workbook.Worksheets[0];
89 }
90
91 public void OpenFile(int activeWorksheetIndex)
92 {
93 OpenFile();
94 ActiveWorksheet(activeWorksheetIndex);
95 }
96
97 public void OpenFile(string activeWorksheetName)
98 {
99 OpenFile();
100 ActiveWorksheet(activeWorksheetName);
101 }
102
103 #endregion
104 }
105 }

文件检查与解析

1 public void Process()
2 {
3 m_parser = new AsposeCellParser(m_batchInventoryFile.FileName, m_fileExt);
4
5 //open file
6 try
7 {
8 m_parser.OpenFile();
9 }
10 catch
11 {
12 EmailUtil.Send(m_batchInventoryFile.UserEmail, Consts.Batch_Inventory_File_Can_Not_Open, "File you uploaded cannot be open.");
13 return;
14 }
15
16 //resolve headers
17 var dictMapping = ConfigManager.BatchFileConfig.BatchInventory.InventoryPropertyMappingDict;
18 List<ExcelHeaderModel> headerList = new List<ExcelHeaderModel>();
19 List<string> headerNameList = new List<string>();
20 int rowIndex = 0;
21 int colIndex = 0;
22 for (; colIndex < m_parser.ColumnCount; colIndex++)
23 {
24 string headerName = m_parser.GetCellValue(rowIndex, colIndex).ToString().Trim();
25 ExcelHeaderModel header = new ExcelHeaderModel();
26 header.ColumnIndex = colIndex;
27 header.HeaderName = headerName;
28 header.IsDuplicated = headerNameList.Contains(headerName);
29
30 headerList.Add(header);
31 headerNameList.Add(headerName);
32 }
33
34 //check missing key column or not
35 List<string> lostKeyList = new List<string>();
36 foreach (var key in dictMapping.Keys)
37 {
38 var mapping = dictMapping[key];
39 if (mapping.IsKey
40 && !headerNameList.Contains(mapping.HeaderName))
41 {
42 lostKeyList.Add(mapping.HeaderName);
43 }
44 }
45 if (lostKeyList.Count > 0)
46 {
47 string keys = lostKeyList[0];
48 for (int i = 1; i < lostKeyList.Count; i++)
49 {
50 keys += string.Format(",{0}", lostKeyList[i]);
51 }
52
53 EmailUtil.Send(m_batchInventoryFile.UserEmail,
54 Consts.Batch_Inventory_File_Missing_Key,
55 string.Format("File you uploaded is missing key(s) {0}.", keys));
56 return;
57 }
58
59 //check exist duplicate columns or not
60 var duplicateColumnList = headerList.FindAll(h => h.IsDuplicated);
61 if (duplicateColumnList.Count > 0)
62 {
63 string duplicateColumns = duplicateColumnList[0].HeaderName;
64 for (int i = 1; i < duplicateColumnList.Count; i++)
65 {
66 duplicateColumns += string.Format(",{0}", duplicateColumnList[i].HeaderName);
67 }
68 EmailUtil.Send(m_batchInventoryFile.UserEmail,
69 Consts.Batch_Inventory_File_Having_Duplicated_Column,
70 string.Format("File you uploaded has duplicated column(s) {0}.", duplicateColumns));
71 return;
72 }
73
74 //check exist undefined columns or not
75 var undefinedColumnList = headerList.FindAll(h => !h.IsTemplateDefined);
76 if (undefinedColumnList.Count > 0)
77 {
78 string undefinedColumns = undefinedColumnList[0].HeaderName;
79 for (int i = 1; i < undefinedColumnList.Count; i++)
80 {
81 undefinedColumns += string.Format(",{0}", undefinedColumnList[i].HeaderName);
82 }
83 EmailUtil.Send(m_batchInventoryFile.UserEmail,
84 Consts.Batch_Inventory_File_Having_Undefined_Column,
85 string.Format("File you uploaded has undefined column(s) {0}.", undefinedColumns));
86 return;
87 }
88
89 //mapping header according to configuration
90 foreach (var header in headerList)
91 {
92 if (dictMapping.Keys.Contains(header.HeaderName))
93 {
94 header.IsTemplateDefined = true;
95 var mapping = dictMapping[header.HeaderName];
96 header.IsKey = mapping.IsKey;
97 header.IsTransactional = mapping.IsTransactional;
98 header.PropertyName = mapping.PropertyName;
99 header.DataType = mapping.DataType;
100 header.DefaultValue = mapping.DefaultValue;
101 header.ColumnName = mapping.ColumnName;
102 header.IsSSBNode = mapping.IsSSBNode;
103 }
104 }
105
106 //check and extract business data
107 if (!Directory.Exists(ConfigManager.BatchFileConfig.BatchInventory.File2Dir))
108 {
109 Directory.CreateDirectory(ConfigManager.BatchFileConfig.BatchInventory.File2Dir);
110 }
111 var partitionList = Partitioner.Create(1, m_parser.RowCount + 1);
112 Parallel.ForEach(partitionList, (p, loopState) =>
113 {
114 DataTable dt = CreateDataTable(headerList);
115 for (int i = p.Item1; i < p.Item2; i++)
116 {
117 DataRow dr = dt.NewRow();
118 XElement xNode = new XElement(ConfigManager.BatchFileConfig.BatchInventory.RootPropertyName);
119 List<string> rowErrorList = new List<string>();
120
121 foreach (var header in headerList)
122 {
123 object value = null;
124 string strValue = string.Empty;
125 if (header.IsKey || header.IsTransactional || header.IsSSBNode)
126 {
127 value = m_parser.GetCellValue(i, header.ColumnIndex);
128 strValue = value.ToString().Trim();
129 if (string.IsNullOrEmpty(strValue))
130 {
131 strValue = header.DefaultValue;
132 }
133 //find property's validator and perform validation
134 if (header.IsTemplateDefined)
135 {
136 var property = dictMapping[header.HeaderName];
137 if (!PropertyValidator.Validate(property.Validator, strValue))
138 {
139 rowErrorList.Add(string.Format(property.Validator.Tips, property.PropertyName));
140 }
141 }
142 }
143 if (header.IsTransactional)
144 {
145 dr[header.ColumnName] = value;
146 }
147 if (header.IsSSBNode)
148 {
149 xNode.Add(new XElement(header.PropertyName, new XCData(strValue)));
150 }
151 }
152
153 dr["BatchFileID"] = m_batchInventoryFile.TransactionNumber;
154 dr["RowIndex"] = i.ToString();
155 if (rowErrorList.Count == 0)
156 {
157 string fileName = i.ToString() + ".xml";
158 string filePath = Path.Combine(ConfigManager.BatchFileConfig.BatchInventory.File2Dir, fileName);
159 File.WriteAllText(filePath, xNode.ToString());
160 dr["FileName"] = fileName;
161 dr["CheckResult"] = Consts.Success;
162 }
163 else
164 {
165 dr["CheckResult"] = Consts.Failed;
166 XElement xNodeCheckMemo = new XElement("CheckMemoList", from error in rowErrorList
167 select new XElement("CheckMemo", error));
168 dr["CheckMemo"] =xNodeCheckMemo.ToString();
169 }
170 dr["HasCheck"] = Consts.Yes;
171 dt.Rows.Add(dr);
172
173 if (dt.Rows.Count == ConfigManager.BatchFileConfig.BatchInventory.BatchSize)
174 {
175 //write to DB
176 SQLHelper.BulkCopy(dt);
177 dt.Rows.Clear();
178 }
179 }
180
181 //process the last batch
182 if (dt.Rows.Count > 0)
183 {
184 //write to DB
185 SQLHelper.BulkCopy(dt);
186 dt.Rows.Clear();
187 }
188 });
189 }
190
191 private DataTable CreateDataTable(List<ExcelHeaderModel> headerList)
192 {
193 DataTable dt = new DataTable(ConfigManager.BatchFileConfig.BatchInventory.DataTableName);
194 foreach (var header in headerList)
195 {
196 if (header.IsTransactional)
197 {
198 dt.Columns.Add(header.ColumnName, Type.GetType(header.DataType));
199 }
200 }
201 dt.Columns.Add("BatchFileID", typeof(int));
202 dt.Columns.Add("RowIndex", typeof(int));
203 dt.Columns.Add("FileName", typeof(string));
204 dt.Columns.Add("HasCheck", typeof(string));
205 dt.Columns.Add("CheckResult", typeof(string));
206 dt.Columns.Add("CheckMemo", typeof(string));
207
208 return dt;
209 }

为了提高文件解析的性能使用了多线程以及SqlBulkCopy等技术。下面是SQLHelper的定义:

1 public static class SQLHelper
2 {
3 private static string m_connectionString = ConfigurationManager.ConnectionStrings["MSSQL"].ConnectionString;
4 public static string ConnectionString
5 {
6 get
7 {
8 return m_connectionString;
9 }
10 }
11
12 public static void BulkCopy(DataTable dt, string destTableName, List<SqlBulkCopyColumnMapping> columnMappingList, int timeoutSeconds = 1200)
13 {
14 using (SqlConnection conn = new SqlConnection(m_connectionString))
15 {
16 conn.Open();
17 SqlTransaction trans = conn.BeginTransaction(IsolationLevel.ReadCommitted, "Bulk copy batch data");
18 SqlBulkCopy b = new SqlBulkCopy(conn, SqlBulkCopyOptions.Default, trans);
19 b.BulkCopyTimeout = timeoutSeconds;
20 b.DestinationTableName = destTableName;
21 if (columnMappingList != null)
22 {
23 foreach (var colMapping in columnMappingList)
24 {
25 b.ColumnMappings.Add(colMapping);
26 }
27 }
28 b.WriteToServer(dt);
29 trans.Commit();
30 b.Close();
31 conn.Close();
32 }
33 }
34
35 public static void BulkCopy(DataTable dt)
36 {
37 List<SqlBulkCopyColumnMapping> columnMappingList = new List<SqlBulkCopyColumnMapping>();
38 foreach (DataColumn col in dt.Columns)
39 {
40 columnMappingList.Add(new SqlBulkCopyColumnMapping
41 {
42 SourceColumn = col.ColumnName,
43 DestinationColumn = col.ColumnName
44 });
45 }
46 BulkCopy(dt, dt.TableName, columnMappingList);
47 }
48 }
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: