标签:getc add puts column min out form model pre
上一篇文章提过,HSSF的用户模式会导致读取海量数据时很慢,所以我们采用的是事件驱动模式。这个模式类似于xml的sax解析。需要实现一个接口,HSSFListener接口。
原理:根据excel底层存储(07以版本采用xml存储,以下版本采用二进制)标签决定事件出发点。
目标:在解析完一行(row)数据时进行触发。
优点:读取大数据时,不会导致内存溢出
缺点:用户在解析数据时比较困难。读取数据时速度不快的,因为读取数据的同时根据每个标签进行事件触发。
HSSF事件驱动读取文件的封装类解析。
主要用到两个类(一个是抽象类,一个是工具类)和一个接口:
第一个:工具类:HxlsRead.java:
package yycg.util; import java.io.FileNotFoundException; import java.io.IOException; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; public class HxlsRead extends HxlsAbstract{ //数据处理解析数据的接口 private HxlsOptRowsInterface hxlsOptRowsInterface; //处理数据总数 private int optRows_sum = 0; //处理数据成功数量 private int optRows_success = 0; //处理数据失败数量 private int optRows_failure = 0; //excel表格每列标题 private List<String> rowtitle ; //失败数据 private List<List<String>> failrows; //失败原因 private List<String> failmsgs ; //要处理数据所在的sheet索引,sheet索引从0开始 private int sheetIndex; /** * 导入文件的名称 * @param filename 导入文件的物理路径 * @param sheetIndex 要读取数据所在sheet序号 * @param hxlsOptRowsInterface 处理读取每一行数据的接口 * @throws IOException * @throws FileNotFoundException * @throws SQLException */ public HxlsRead(String filename,int sheetIndex,HxlsOptRowsInterface hxlsOptRowsInterface) throws IOException, FileNotFoundException, SQLException { super(filename); this.sheetIndex = sheetIndex; this.hxlsOptRowsInterface = hxlsOptRowsInterface; this.rowtitle = new ArrayList<String>(); this.failrows = new ArrayList<List<String>>(); this.failmsgs = new ArrayList<String>(); } /** * 对读取到一行数据进行解析 */ @Override public void optRows(int sheetIndex,int curRow, List<String> rowlist) throws Exception { /*for (int i = 0 ;i< rowlist.size();i++){ System.out.print("‘"+rowlist.get(i)+"‘,"); } System.out.println();*/ //将rowlist的长度补齐和标题一致 int k=rowtitle.size()-rowlist.size(); for(int i=0;i<k;i++){ rowlist.add(null); } if(sheetIndex == this.sheetIndex){ optRows_sum++; if(curRow == 0){//记录标题 rowtitle.addAll(rowlist); }else{ //接口返回的结果是导入数据的结果,有成功,有失败 String result = hxlsOptRowsInterface.optRows(sheetIndex, curRow, rowlist); if(result!=null && !result.equals(hxlsOptRowsInterface.SUCCESS)){ optRows_failure++;//失败统计数加1 //失败数据列表 failrows.add(new ArrayList<String>(rowlist)); failmsgs.add(result); }else{ optRows_success++; } } } } public long getOptRows_sum() { return optRows_sum; } public void setOptRows_sum(int optRows_sum) { this.optRows_sum = optRows_sum; } public long getOptRows_success() { return optRows_success; } public void setOptRows_success(int optRows_success) { this.optRows_success = optRows_success; } public long getOptRows_failure() { return optRows_failure; } public void setOptRows_failure(int optRows_failure) { this.optRows_failure = optRows_failure; } public List<String> getRowtitle() { return rowtitle; } public List<List<String>> getFailrows() { return failrows; } public List<String> getFailmsgs() { return failmsgs; } public void setFailmsgs(List<String> failmsgs) { this.failmsgs = failmsgs; } public static void main(String[] args){ HxlsRead xls2csv; try { //第一个参数就是导入的文件 //第二个参数就是导入文件中哪个sheet //第三个参数导入接口的实现类对象 xls2csv = new HxlsRead("d:/test11.xls",0,new HxlsOptRowsInterfaceImpl()); xls2csv.process(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (SQLException e) { e.printStackTrace(); } } }
第二个抽象类:
package yycg.util; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.PrintStream; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener; import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener; import org.apache.poi.hssf.eventusermodel.HSSFEventFactory; import org.apache.poi.hssf.eventusermodel.HSSFListener; import org.apache.poi.hssf.eventusermodel.HSSFRequest; import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener; import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord; import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord; import org.apache.poi.hssf.model.HSSFFormulaParser; import org.apache.poi.hssf.record.BOFRecord; import org.apache.poi.hssf.record.BlankRecord; import org.apache.poi.hssf.record.BoolErrRecord; import org.apache.poi.hssf.record.BoundSheetRecord; import org.apache.poi.hssf.record.FormulaRecord; import org.apache.poi.hssf.record.LabelRecord; import org.apache.poi.hssf.record.LabelSSTRecord; import org.apache.poi.hssf.record.NoteRecord; import org.apache.poi.hssf.record.NumberRecord; import org.apache.poi.hssf.record.RKRecord; import org.apache.poi.hssf.record.Record; import org.apache.poi.hssf.record.SSTRecord; import org.apache.poi.hssf.record.StringRecord; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.poifs.filesystem.POIFSFileSystem; public abstract class HxlsAbstract implements HSSFListener { private int minColumns; private POIFSFileSystem fs; private PrintStream output; private int lastRowNumber; private int lastColumnNumber; /** Should we output the formula, or the value it has? */ private boolean outputFormulaValues = true; /** For parsing Formulas */ private SheetRecordCollectingListener workbookBuildingListener; private HSSFWorkbook stubWorkbook; // Records we pick up as we process private SSTRecord sstRecord; private FormatTrackingHSSFListener formatListener; /** So we known which sheet we‘re on */ private int sheetIndex = -1; private BoundSheetRecord[] orderedBSRs; @SuppressWarnings("unchecked") private ArrayList boundSheetRecords = new ArrayList(); // For handling formulas with string results private int nextRow; private int nextColumn; private boolean outputNextStringRecord; private int curRow; private List<String> rowlist; @SuppressWarnings( "unused") private String sheetName; public HxlsAbstract(POIFSFileSystem fs) throws SQLException { this.fs = fs; this.output = System.out; this.minColumns = -1; this.curRow = 0; this.rowlist = new ArrayList<String>(); } public HxlsAbstract(String filename) throws IOException, FileNotFoundException, SQLException { this(new POIFSFileSystem(new FileInputStream(filename))); } //excel记录行操作方法,以行索引和行元素列表为参数,对一行元素进行操作,元素为String类型 // public abstract void optRows(int curRow, List<String> rowlist) throws SQLException ; //excel记录行操作方法,以sheet索引,行索引和行元素列表为参数,对sheet的一行元素进行操作,元素为String类型,rowlist存储了行数据 public abstract void optRows(int sheetIndex,int curRow, List<String> rowlist) throws Exception; /** * 遍历 excel 文件 */ public void process() throws IOException { MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener( this); formatListener = new FormatTrackingHSSFListener(listener); HSSFEventFactory factory = new HSSFEventFactory(); HSSFRequest request = new HSSFRequest(); if (outputFormulaValues) { request.addListenerForAllRecords(formatListener); } else { workbookBuildingListener = new SheetRecordCollectingListener( formatListener); request.addListenerForAllRecords(workbookBuildingListener); } factory.processWorkbookEvents(request, fs); } /** * HSSFListener 监听方法,处理 Record */ @SuppressWarnings("unchecked") public void processRecord(Record record) { int thisRow = -1; int thisColumn = -1; String thisStr = null; String value = null; switch (record.getSid()) { case BoundSheetRecord.sid: boundSheetRecords.add(record); break; case BOFRecord.sid: BOFRecord br = (BOFRecord) record; if (br.getType() == BOFRecord.TYPE_WORKSHEET) { // Create sub workbook if required if (workbookBuildingListener != null && stubWorkbook == null) { stubWorkbook = workbookBuildingListener .getStubHSSFWorkbook(); } // Works by ordering the BSRs by the location of // their BOFRecords, and then knowing that we // process BOFRecords in byte offset order sheetIndex++; if (orderedBSRs == null) { orderedBSRs = BoundSheetRecord .orderByBofPosition(boundSheetRecords); } sheetName = orderedBSRs[sheetIndex].getSheetname(); } break; case SSTRecord.sid: sstRecord = (SSTRecord) record; break; case BlankRecord.sid: BlankRecord brec = (BlankRecord) record; thisRow = brec.getRow(); thisColumn = brec.getColumn(); thisStr = ""; break; case BoolErrRecord.sid: BoolErrRecord berec = (BoolErrRecord) record; thisRow = berec.getRow(); thisColumn = berec.getColumn(); thisStr = ""; break; case FormulaRecord.sid: FormulaRecord frec = (FormulaRecord) record; thisRow = frec.getRow(); thisColumn = frec.getColumn(); if (outputFormulaValues) { if (Double.isNaN(frec.getValue())) { // Formula result is a string // This is stored in the next record outputNextStringRecord = true; nextRow = frec.getRow(); nextColumn = frec.getColumn(); } else { thisStr = formatListener.formatNumberDateCell(frec); } } else { thisStr = ‘"‘ + HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + ‘"‘; } break; case StringRecord.sid: if (outputNextStringRecord) { // String for formula StringRecord srec = (StringRecord) record; thisStr = srec.getString(); thisRow = nextRow; thisColumn = nextColumn; outputNextStringRecord = false; } break; case LabelRecord.sid: LabelRecord lrec = (LabelRecord) record; curRow = thisRow = lrec.getRow(); thisColumn = lrec.getColumn(); value = lrec.getValue().trim(); value = value.equals("")?" ":value; this.rowlist.add(thisColumn, value); break; case LabelSSTRecord.sid: LabelSSTRecord lsrec = (LabelSSTRecord) record; curRow = thisRow = lsrec.getRow(); thisColumn = lsrec.getColumn(); if (sstRecord == null) { rowlist.add(thisColumn, " "); } else { value = sstRecord .getString(lsrec.getSSTIndex()).toString().trim(); value = value.equals("")?" ":value; rowlist.add(thisColumn,value); } break; case NoteRecord.sid: NoteRecord nrec = (NoteRecord) record; thisRow = nrec.getRow(); thisColumn = nrec.getColumn(); // TODO: Find object to match nrec.getShapeId() thisStr = ‘"‘ + "(TODO)" + ‘"‘; break; case NumberRecord.sid: NumberRecord numrec = (NumberRecord) record; curRow = thisRow = numrec.getRow(); thisColumn = numrec.getColumn(); value = formatListener.formatNumberDateCell(numrec).trim(); value = value.equals("")?" ":value; // Format rowlist.add(thisColumn, value); break; case RKRecord.sid: RKRecord rkrec = (RKRecord) record; thisRow = rkrec.getRow(); thisColumn = rkrec.getColumn(); thisStr = ‘"‘ + "(TODO)" + ‘"‘; break; default: break; } // 遇到新行的操作 if (thisRow != -1 && thisRow != lastRowNumber) { lastColumnNumber = -1; } // 空值的操作 if (record instanceof MissingCellDummyRecord) { MissingCellDummyRecord mc = (MissingCellDummyRecord) record; curRow = thisRow = mc.getRow(); thisColumn = mc.getColumn(); rowlist.add(thisColumn," "); } // 如果遇到能打印的东西,在这里打印 if (thisStr != null) { if (thisColumn > 0) { output.print(‘,‘); } output.print(thisStr); } // 更新行和列的值 if (thisRow > -1) lastRowNumber = thisRow; if (thisColumn > -1) lastColumnNumber = thisColumn; // 行结束时的操作 if (record instanceof LastCellOfRowDummyRecord) { if (minColumns > 0) { // 列值重新置空 if (lastColumnNumber == -1) { lastColumnNumber = 0; } } // 行结束时, 调用 optRows() 方法 lastColumnNumber = -1; try { optRows(sheetIndex,curRow, rowlist); } catch (Exception e) { e.printStackTrace(); } rowlist.clear(); } } }
一个接口:把excel中数据的导出处理,就要实现这个接口
package yycg.util; import java.util.List; public interface HxlsOptRowsInterface { public static final String SUCCESS="success"; /** * 处理excel文件每行数据方法 * @param sheetIndex 为sheet的序号 * @param curRow 为行号 * @param rowlist 行数据 * @return success:成功,否则为失败原因 * @throws Exception */ public String optRows(int sheetIndex,int curRow, List<String> rowlist) throws Exception; }
总结结构如下:
我们直接看HxlsRead里面的main函数里面是怎么用的(主要看注释):
public static void main(String[] args){ HxlsRead xls2csv; try { //第一个参数就是导入的文件 //第二个参数就是导入文件中哪个sheet //第三个参数导入接口的实现类对象 /** * 这里的HxlsOptRowsInterfaceImpl()是HxlsOptRowsInterface接口的实现类。主要是 处理excel文件每行数据方法 * 就是说我们后续要把每一行数据存入到数据库的话,只要实现这个接口,然后在实现方法里面 * 实现吧数据插入到数据库就可以了。这是一个接口,当我想做别的应用的话,就可以实现这个接口,做相应的功能。 * 其他的任何结构都不用修改 * */ xls2csv = new HxlsRead("d:/test11.xls",0,new HxlsOptRowsInterfaceImpl()); /* * 我们的xls2csv就是HxlsRead的类型,而HxlsRead继承了HxlsAbstract这个抽象类 * process是HxlsAbstract这个抽象类中的具体方法。这里的xls2csv.process();实际上调用的是父类 * 的process方法,我们看下父类的process方法,看到里面要调用optRows(sheetIndex,curRow, rowlist);这个抽象方法 * 在子类(本类中)已经实现了这个方法,由于多态的原因,调用的就是子类的这个方法。 * */ xls2csv.process(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (SQLException e) { e.printStackTrace(); } }
这样这个类的封装就好了。
025医疗项目-模块二:药品目录的导入导出-HSSF导入类的封装
标签:getc add puts column min out form model pre
原文地址:http://www.cnblogs.com/shenxiaoquan/p/6106547.html