标签:
我们先来回忆回忆上篇文章讲到的通过xml配置文件实现excel批量模板解析的整体思路:
1.对每个excel模板制定xml配置规则集,实现xml配置文件的解析服务
2.为每个excel模板制定DTO,继承于一个BaseDTO
3.实现两个工厂方法,一是获取某excel模板xml配置文件路径;一是获取某excel模板DTO对象
4.EXCEL工具类对外暴露两个接口,一是数据验证接口,返回整个excel所有错误单元格信息;一是数据解析接口,读取单元格数据到DTO。通过构造方法传入配置文件
5.整体流程:用户上传excel文件,首先存储到临时文件夹,其次获取xml配置文件路径,获取DTO对象,然后初始化excel工具类,再验证excel单元格数据,解析数据返回DTO,最后调用入库服务实现DTO数据入库
接下来我们就来看看excel工具类的具体实现代码
1 public class ExcelImportService : ExcelAnalyzeService, IExcelImportService 2 { 3 private string _filePath; 4 private string _xmlPath; 5 private Dictionary<int, int> _rowCount = new Dictionary<int, int>(); 6 private List<Regular> _list;// 规则集 7 8 /// <summary> 9 /// 构造方法 10 /// </summary> 11 /// <param name="filePath">excel文件路径</param> 12 /// <param name="xmlPath">配置文件路径</param> 13 public ExcelImportService(string filePath, string xmlPath) 14 { 15 _filePath = filePath; 16 _xmlPath = xmlPath; 17 _list = this.GetXMLInfo(_xmlPath); 18 } 19
// excel所有单元格数据验证 20 public UploadExcelFileResult ValidateExcel() 21 { 22 var result = new UploadExcelFileResult(); 23 result.Success = true; 24 25 _rowCount = new Dictionary<int, int>(); 26 27 Stream fileStream = new FileStream(_filePath, FileMode.Open); 28 int edition = this.GetExcelEdition(_filePath); 29 if (edition != 0) 30 { 31 IWorkbook workbook = this.CreateWorkBook(edition, fileStream); 32 int sheetCount = _list.Find(e => e.HeaderRegular != null).HeaderRegular["sheetCount"]; 33 34 for (int i = 0; i < sheetCount; i++) 35 { 36 ISheet sheet = workbook.GetSheetAt(i); 37 Dictionary<int, string> dict = this.GetExcelHeaders(sheet, ref result, _list); 38 if (result.Success) 39 { 40 _rowCount.Add(i, sheet.LastRowNum); 41 result = this.CheckExcelDatasEnableNull(sheet, _list, dict, _rowCount[i]); 42 } 43 else 44 { 45 break; 46 } 47 } 48 } 49 else 50 { 51 result.Success = false; 52 result.Message = "文件类型错误!"; 53 } 54 55 fileStream.Close(); 56 return result; 57 } 58
// 解析excel数据到DTO 59 public List<TableDTO> Import<TableDTO>() 60 { 61 var uploadExcelFileResult = new UploadExcelFileResult(); 62 var resultList = new List<TableDTO>(); 63 64 Stream fileStream = new FileStream(_filePath, FileMode.Open); 65 int edition = this.GetExcelEdition(_filePath); 66 IWorkbook workbook = this.CreateWorkBook(edition, fileStream); 67 int sheetCount = _list.Find(e => e.HeaderRegular != null).HeaderRegular["sheetCount"]; 68 69 for (int i = 0; i < sheetCount; i++) 70 { 71 ISheet sheet = workbook.GetSheetAt(i); 72 string sheetName = sheet.SheetName; 73 Dictionary<int, string> dict = this.GetExcelHeaders(sheet, ref uploadExcelFileResult, _list); 74 var sheetLists = this.GetExcelDatas<TableDTO>(sheet, sheetName, _list, dict, _rowCount[i]); 75 resultList.AddRange(sheetLists); 76 } 77 78 fileStream.Close(); 79 return resultList; 80 } 81 }
1.我们看到17行用到了GetXMLInfo()方法,就是第二篇文章中说到的XML文件解析方法,返回该excel的规则集
2.第28行GetExcelEdition()方法,是基础解析接口IExcelAnalyzeService的方法,验证返回excel版本
1 public int GetExcelEdition(string fileName) 2 { 3 var edition = 0; 4 string[] items = fileName.Split(new char[] { ‘.‘ }); 5 int count = items.Length; 6 switch (items[count - 1]) 7 { 8 case "xls": 9 edition = 3; 10 break; 11 case "xlsx": 12 edition = 7; 13 break; 14 default: 15 break; 16 } 17 18 return edition; 19 }
3.第31行CreateWorkBook()方法,是基础解析接口IExcelAnalyzeService的方法,返回excel工作簿对象
1 public IWorkbook CreateWorkBook(int edition, Stream excelFileStream) 2 { 3 switch (edition) 4 { 5 case 7: 6 return new XSSFWorkbook(excelFileStream); 7 case 3: 8 return new HSSFWorkbook(excelFileStream); 9 default: 10 return null; 11 } 12 }
4.第32行是读取配置文件中excel中sheet个数(例如员工模板:我们支持一个excel文件多个sheet表单,可以是每个表单代表一个地区等等)
5.第37行中GetExcelHeaders()方法,是基础解析接口IExcelAnalyzeService的方法,验证返回excel表头数据
1 public Dictionary<int, string> GetExcelHeaders(ISheet sheet, ref UploadExcelFileResult uploadExcelFileResult, 2 List<Regular> list) 3 { 4 int firstHeaderRowIndex = list.Find(e => e.HeaderRegular != null).HeaderRegular["firstHeaderRow"]; 5 int lastHeaderRowIndex = list.Find(e => e.HeaderRegular != null).HeaderRegular["lastHeaderRow"]; 6 7 var dict = new Dictionary<int, string>(); 8 9 try 10 { 11 // 循环获得表头 12 for (int i = firstHeaderRowIndex - 1; i < lastHeaderRowIndex; i++) 13 { 14 IRow headerRow = sheet.GetRow(i); 15 int cellCount = headerRow.LastCellNum; 16 17 for (int j = headerRow.FirstCellNum; j < cellCount; j++) 18 { 19 if (!string.IsNullOrEmpty(headerRow.GetCell(j).StringCellValue.Trim())) 20 { 21 // 根据 键-值 是否已存在做不同处理
//TODO 代码待重构!!! 22 try 23 { 24 string oldValue = dict[j]; 25 dict.Remove(j); 26 dict.Add(j, oldValue + headerRow.GetCell(j).StringCellValue.Trim()); 27 } 28 catch (Exception) 29 { 30 dict.Add(j, headerRow.GetCell(j).StringCellValue.Trim()); 31 } 32 } 33 } 34 } 35 // 遍历表头字典,消除空格 36 for (int i = 0; i < dict.Count; i++) 37 { 38 var value = dict[i]; 39 this.ReplaceSpace(ref value); 40 dict[i] = value; 41 } 42 // 检查表头模板是否被修改 43 for (int count = 0; count < dict.Count; count++) 44 { 45 Regular header = list.Find(h => h.HeaderText == dict[count]); 46 47 if (header == null) 48 { 49 uploadExcelFileResult.Success = false; 50 uploadExcelFileResult.Message = "读取EXCEL表头模板时发生错误,可能造成原因是:EXCEL模板被修改!请下载最新EXCEL模板!"; 51 } 52 } 53 } 54 catch (Exception e) 55 { 56 uploadExcelFileResult.Success = false; 57 uploadExcelFileResult.Message = "读取EXCEL表头模板时发生错误,可能造成原因是:EXCEL模板被修改!请下载最新EXCEL模板!"; 58 } 59 60 return dict; 61 }
其中39行ReplaceSpace()是消除字符串中空格方法(所有半角、全角)。一直想通过正则表达式来做,但是没学到家,还没写好能够做到的正则表达式,所以写的有点复杂,若果谁有这样的正则表达式,请指点一二,感激不尽!!!
1 // 去除空值 2 public void ReplaceSpace(ref string cellValue) 3 { 4 cellValue = TruncateString(cellValue, new char[] { ‘ ‘ }, new char[] { ‘ ‘ }); 5 } 6 7 // 对字符串做空格剔除处理 8 private string TruncateString(string originalWord, char[] spiltWord1, char[] spiltWord2) 9 { 10 var result = ""; 11 var valueReplaceDbcCase = originalWord.Split(spiltWord1); 12 13 if (valueReplaceDbcCase.Count() > 1) 14 { 15 for (int i = 0; i < valueReplaceDbcCase.Count(); i++) 16 { 17 if (valueReplaceDbcCase[i] != "" && valueReplaceDbcCase[i] != " " && 18 valueReplaceDbcCase[i] != " ") 19 { 20 result += TruncateString(valueReplaceDbcCase[i], spiltWord2, new char[0]); 21 } 22 } 23 } 24 else 25 { 26 if (spiltWord2.Any()) 27 { 28 result = TruncateString(originalWord, spiltWord2, new char[0]); 29 } 30 else 31 { 32 result = originalWord; 33 } 34 } 35 36 return result; 37 }
6.第41行CheckExcelDatasEnableNull()方法,是基础解析接口IExcelAnalyzeService的方法,返回excel数据验证结果
1 public UploadExcelFileResult CheckExcelDatasEnableNull(ISheet sheet, List<Regular> list, Dictionary<int, string> dict, int rowCount) 2 { 3 var result = new UploadExcelFileResult(); 4 result.Success = true; 5 6 // 记录单个sheet所有错误信息 7 var sheetErrors = new List<ExcelFileErrorPosition>(); 8 // 表头结束行 9 int lastHeaderRowIndex = list.Find(e => e.HeaderRegular != null).HeaderRegular["lastHeaderRow"]; 10 11 // 循环行数据 12 for (int i = lastHeaderRowIndex; i <= rowCount; i++) 13 { 14 // 标注该行是否出错 15 bool isrowfalse = false; 16 // 记录该行数据临时对象 17 var rowDatas = new List<string>(); 18 // 记录该行错误列 19 var rowErrorCell = new List<int>(); 20 // 记录该行错误列具体错误信息 21 var rowErrorMessages = new List<string>(); 22 // 记录该行空值数 23 int nullcount = 0; 24 25 26 IRow dataRow = sheet.GetRow(i); 27 int cellCount = dict.Count; 28 29 // 循环列数据 30 for (int j = dataRow.FirstCellNum; j < cellCount; j++) 31 { 32 string value = ""; 33 Regular header = list.Find(h => h.HeaderText == dict[j]); 34 //value = dataRow.GetCell(j).ToString(); 35 switch (dataRow.GetCell(j).CellType) 36 { 37 case CellType.Formula: 38 value = dataRow.GetCell(j).StringCellValue.ToString(); 39 break; 40 default: 41 value = dataRow.GetCell(j).ToString(); 42 break; 43 } 44 45 // 记录可能出错数据 46 rowDatas.Add(value); 47 48 // 检查空值 49 if (!this.CheckNull(value, ref nullcount)) 50 { 51 // 检查类型 52 if (!this.CheckDataType(header.DataType, value)) 53 { 54 isrowfalse = true; 55 result.Success = false; 56 // 记录该行错误信息 57 rowErrorCell.Add(j + 1); 58 rowErrorMessages.Add("读取EXCEL数据时发生数据格式错误,请检查该行该列数据格式!"); 59 } 60 else 61 { 62 if (header.DataType == "System.string" || header.DataType == "System.String") 63 { 64 this.ReplaceSpace(ref value); 65 } 66 } 67 } 68 } 69 // 报错处理(空行不报错) 70 if (isrowfalse && nullcount < cellCount) 71 { 72 sheetErrors.Add(new ExcelFileErrorPosition 73 { 74 RowContent = rowDatas, 75 RowIndex = i + 1, 76 CellIndex = rowErrorCell, 77 ErrorMessage = rowErrorMessages 78 }); 79 } 80 } 81 result.ExcelFileErrorPositions = sheetErrors; 82 return result; 83 }
CheckNull()检查空值,是空值则nullcount++;
7.第74行GetExcelDatas()方法,是基础解析接口IExcelAnalyzeService的方法,返回excel数据解析结果
1 public List<TableDTO> GetExcelDatas<TableDTO>(ISheet sheet, string sheetName, List<Regular> list, 2 Dictionary<int, string> dict, int rowCount) 3 { 4 // 返回数据对象集合 5 var resultList = new List<TableDTO>(); 6 // 表头结束行 7 int lastHeaderRowIndex = list.Find(e => e.HeaderRegular != null).HeaderRegular["lastHeaderRow"]; 8 9 // 循环行数据 10 for (int i = lastHeaderRowIndex; i <= rowCount; i++) 11 { 12 // 产生一个新的泛型对象 13 var model = Activator.CreateInstance<TableDTO>(); 14 // 记录该行空值数 15 int nullcount = 0; 16 17 IRow dataRow = sheet.GetRow(i); 18 int cellCount = dict.Count; 19 20 if (dataRow != null) 21 { 22 // 循环列数据 23 for (int j = dataRow.FirstCellNum; j < cellCount; j++) 24 { 25 string value = ""; 26 Regular header = list.Find(h => h.HeaderText == dict[j]); 27 PropertyInfo prop = model.GetType().GetProperty(header.PropertyName); 28 //value = dataRow.GetCell(j).ToString(); 29 switch (dataRow.GetCell(j).CellType) 30 { 31 case CellType.Formula: 32 value = dataRow.GetCell(j).StringCellValue.ToString(); 33 break; 34 default: 35 value = dataRow.GetCell(j).ToString(); 36 break; 37 } 38 39 // 去除空值 40 this.ReplaceSpace(ref value); 41 42 if (value == "") 43 { 44 nullcount++; 45 } 46 47 // 赋值 48 switch (header.DataType) 49 { 50 case "System.double": 51 double valueDecimal; 52 if (double.TryParse(value, out valueDecimal)) 53 { 54 prop.SetValue(model, valueDecimal, null); 55 } 56 break; 57 case "System.Int16": 58 short valueInt16; 59 if (Int16.TryParse(value, out valueInt16)) 60 { 61 prop.SetValue(model, valueInt16, null); 62 } 63 break; 64 case "System.Int32": 65 int valueInt32; 66 if (Int32.TryParse(value, out valueInt32)) 67 { 68 prop.SetValue(model, valueInt32, null); 69 } 70 break; 71 case "System.Boolean": 72 bool valueBoolean; 73 if (Boolean.TryParse(value, out valueBoolean)) 74 { 75 prop.SetValue(model, valueBoolean, null); 76 } 77 break; 78 case "System.DateTime": 79 DateTime valueDateTime; 80 if (DateTime.TryParse(value, out valueDateTime)) 81 { 82 prop.SetValue(model, valueDateTime, null); 83 } 84 break; 85 default: 86 prop.SetValue(model, value, null); 87 break; 88 } 89 } 90 } 91 92 // 添加非空行数据到DTO 93 if (nullcount < cellCount) 94 { 95 resultList.Add(model); 96 } 97 } 98 99 return resultList; 100 }
OK,整体流程中所有代码都贴出来了,写得比较匆忙,如有不当的地方,请大家不吝赐教~~~
附:
博主的需求中还有非单行的复杂表头,涉及到合并单元格表头,甚至左表头等等复杂excel模板,如:
......
这部分excel的解析简直就是非人类的需求,如有需要,会在后续博文继续贴出相关代码,请多多支持....
原创文章,代码都是从自己项目里贴出来的。转载请注明出处哦,亲~~~
NPOI操作EXCEL(三)——反射机制进行excel表格数据的解析
标签:
原文地址:http://www.cnblogs.com/csqb-511612371/p/4891492.html