【转】 大数据量的excel文件读取——excel2007(含代码及示例)

    技术2024-11-13  23

    大数据量的excel文件读取——excel2007(含代码及示例)

    转自:http://www.javaeye.com/topic/624969

     

    excel2007文件格式与之前版本不同,之前版本采用的是微软自己的存储格式。07版内容的存储采用XML格式,所以,理所当然的,对大数据量的 xlsx文件的读取采用的也是XML的处理方式SAX。

        同之前的版本一样,大数据量文件的读取采用的是事件模型eventusermodel。usermodel模式需要将文件一次性全部读到内存中,07版的既然采用的存储模式是xml,解析用的DOM方式也是如此,这种模式操作简单,容易上手,但是对于大量数据占用的内存也是相当可观,在Eclipse中经常出现内存溢出。

        下面就是采用eventusermodel对07excel文件读取。

        同上篇,我将当前行的单元格数据存储到List中,抽象出 optRows 方法,该方法会在每行末尾时调用,方法参数为当前行索引curRow(int型)及存有行内单元格数据的List。继承类只需实现该行级方法即可。

     

        经测试,对12万条数据,7M大小的文件也能正常运行。无需设置vm的内存空间。

     

        excel读取采用的API为POI3.6,使用前先下载此包,若运行中出现其他依赖包不存在,请下载相应依赖包。

     

    抽象类:XxlsAbstract ,作用:遍历excel文件,提供行级操作方法 optRows

    Java代码 package com.gaosheng.util.xls;     import java.io.InputStream;   import java.sql.SQLException;   import java.util.ArrayList;   import java.util.Iterator;   import java.util.List;     import org.apache.poi.xssf.eventusermodel.XSSFReader;   import org.apache.poi.xssf.model.SharedStringsTable;   import org.apache.poi.xssf.usermodel.XSSFRichTextString;   import org.apache.poi.openxml4j.opc.OPCPackage;   import org.xml.sax.Attributes;   import org.xml.sax.InputSource;   import org.xml.sax.SAXException;   import org.xml.sax.XMLReader;   import org.xml.sax.helpers.DefaultHandler;   import org.xml.sax.helpers.XMLReaderFactory;     /**   * XSSF and SAX (Event API)   */  public abstract class XxlsAbstract extends DefaultHandler {       private SharedStringsTable sst;       private String lastContents;       private boolean nextIsString;         private int sheetIndex = -1;       private List<String> rowlist = new ArrayList<String>();       private int curRow = 0;       private int curCol = 0;         //excel记录行操作方法,以行索引和行元素列表为参数,对一行元素进行操作,元素为String类型   //  public abstract void optRows(int curRow, List<String> rowlist) throws SQLException ;              //excel记录行操作方法,以sheet索引,行索引和行元素列表为参数,对sheet的一行元素进行操作,元素为String类型       public abstract void optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException;              //只遍历一个sheet,其中sheetId为要遍历的sheet索引,从1开始,1-3       public void processOneSheet(String filename,int sheetId) throws Exception {           OPCPackage pkg = OPCPackage.open(filename);           XSSFReader r = new XSSFReader(pkg);           SharedStringsTable sst = r.getSharedStringsTable();                      XMLReader parser = fetchSheetParser(sst);             // rId2 found by processing the Workbook           // 根据 rId# 或 rSheet# 查找sheet           InputStream sheet2 = r.getSheet("rId"+sheetId);           sheetIndex++;           InputSource sheetSource = new InputSource(sheet2);           parser.parse(sheetSource);           sheet2.close();       }         /**       * 遍历 excel 文件       */      public void process(String filename) throws Exception {           OPCPackage pkg = OPCPackage.open(filename);           XSSFReader r = new XSSFReader(pkg);           SharedStringsTable sst = r.getSharedStringsTable();             XMLReader parser = fetchSheetParser(sst);             Iterator<InputStream> sheets = r.getSheetsData();           while (sheets.hasNext()) {               curRow = 0;               sheetIndex++;               InputStream sheet = sheets.next();               InputSource sheetSource = new InputSource(sheet);               parser.parse(sheetSource);               sheet.close();           }       }         public XMLReader fetchSheetParser(SharedStringsTable sst)               throws SAXException {           XMLReader parser = XMLReaderFactory                   .createXMLReader("org.apache.xerces.parsers.SAXParser");           this.sst = sst;           parser.setContentHandler(this);           return parser;       }         public void startElement(String uri, String localName, String name,               Attributes attributes) throws SAXException {           // c => 单元格           if (name.equals("c")) {               // 如果下一个元素是 SST 的索引,则将nextIsString标记为true               String cellType = attributes.getValue("t");               if (cellType != null && cellType.equals("s")) {                   nextIsString = true;               } else {                   nextIsString = false;               }           }           // 置空           lastContents = "";       }         public void endElement(String uri, String localName, String name)               throws SAXException {           // 根据SST的索引值的到单元格的真正要存储的字符串           // 这时characters()方法可能会被调用多次           if (nextIsString) {               try {                   int idx = Integer.parseInt(lastContents);                   lastContents = new XSSFRichTextString(sst.getEntryAt(idx))                           .toString();               } catch (Exception e) {                 }           }             // v => 单元格的值,如果单元格是字符串则v标签的值为该字符串在SST中的索引           // 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符           if (name.equals("v")) {               String value = lastContents.trim();               value = value.equals("")?" ":value;               rowlist.add(curCol, value);               curCol++;           }else {               //如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法               if (name.equals("row")) {                   try {                       optRows(sheetIndex,curRow,rowlist);                   } catch (SQLException e) {                       e.printStackTrace();                   }                   rowlist.clear();                   curRow++;                   curCol = 0;               }           }       }         public void characters(char[] ch, int start, int length)               throws SAXException {           //得到单元格内容的值           lastContents += new String(ch, start, length);       }   }   package com.gaosheng.util.xls; import java.io.InputStream; import java.sql.SQLException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.model.SharedStringsTable; import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.apache.poi.openxml4j.opc.OPCPackage; import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.helpers.XMLReaderFactory; /** * XSSF and SAX (Event API) */ public abstract class XxlsAbstract extends DefaultHandler { private SharedStringsTable sst; private String lastContents; private boolean nextIsString; private int sheetIndex = -1; private List<String> rowlist = new ArrayList<String>(); private int curRow = 0; private int curCol = 0; //excel记录行操作方法,以行索引和行元素列表为参数,对一行元素进行操作,元素为String类型 // public abstract void optRows(int curRow, List<String> rowlist) throws SQLException ; //excel记录行操作方法,以sheet索引,行索引和行元素列表为参数,对sheet的一行元素进行操作,元素为String类型 public abstract void optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException; //只遍历一个sheet,其中sheetId为要遍历的sheet索引,从1开始,1-3 public void processOneSheet(String filename,int sheetId) throws Exception { OPCPackage pkg = OPCPackage.open(filename); XSSFReader r = new XSSFReader(pkg); SharedStringsTable sst = r.getSharedStringsTable(); XMLReader parser = fetchSheetParser(sst); // rId2 found by processing the Workbook // 根据 rId# 或 rSheet# 查找sheet InputStream sheet2 = r.getSheet("rId"+sheetId); sheetIndex++; InputSource sheetSource = new InputSource(sheet2); parser.parse(sheetSource); sheet2.close(); } /** * 遍历 excel 文件 */ public void process(String filename) throws Exception { OPCPackage pkg = OPCPackage.open(filename); XSSFReader r = new XSSFReader(pkg); SharedStringsTable sst = r.getSharedStringsTable(); XMLReader parser = fetchSheetParser(sst); Iterator<InputStream> sheets = r.getSheetsData(); while (sheets.hasNext()) { curRow = 0; sheetIndex++; InputStream sheet = sheets.next(); InputSource sheetSource = new InputSource(sheet); parser.parse(sheetSource); sheet.close(); } } public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException { XMLReader parser = XMLReaderFactory .createXMLReader("org.apache.xerces.parsers.SAXParser"); this.sst = sst; parser.setContentHandler(this); return parser; } public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { // c => 单元格 if (name.equals("c")) { // 如果下一个元素是 SST 的索引,则将nextIsString标记为true String cellType = attributes.getValue("t"); if (cellType != null && cellType.equals("s")) { nextIsString = true; } else { nextIsString = false; } } // 置空 lastContents = ""; } public void endElement(String uri, String localName, String name) throws SAXException { // 根据SST的索引值的到单元格的真正要存储的字符串 // 这时characters()方法可能会被调用多次 if (nextIsString) { try { int idx = Integer.parseInt(lastContents); lastContents = new XSSFRichTextString(sst.getEntryAt(idx)) .toString(); } catch (Exception e) { } } // v => 单元格的值,如果单元格是字符串则v标签的值为该字符串在SST中的索引 // 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符 if (name.equals("v")) { String value = lastContents.trim(); value = value.equals("")?" ":value; rowlist.add(curCol, value); curCol++; }else { //如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法 if (name.equals("row")) { try { optRows(sheetIndex,curRow,rowlist); } catch (SQLException e) { e.printStackTrace(); } rowlist.clear(); curRow++; curCol = 0; } } } public void characters(char[] ch, int start, int length) throws SAXException { //得到单元格内容的值 lastContents += new String(ch, start, length); } }

     

    继承类:XxlsBig,作用:将数据转出到数据库临时表

    Java代码 package com.gaosheng.util.examples.xls;     import java.io.FileInputStream;   import java.io.IOException;   import java.sql.Connection;   import java.sql.DriverManager;   import java.sql.PreparedStatement;   import java.sql.SQLException;   import java.sql.Statement;   import java.util.List;   import java.util.Properties;     import com.gaosheng.util.xls.XxlsAbstract;     public class XxlsBig extends XxlsAbstract {       public static void main(String[] args) throws Exception {           XxlsBig howto = new XxlsBig("temp_table");           howto.processOneSheet("F:/new.xlsx",1);           howto.process("F:/new.xlsx");           howto.close();       }              public XxlsBig(String tableName) throws SQLException{           this.conn = getNew_Conn();           this.statement = conn.createStatement();           this.tableName = tableName;       }         private Connection conn = null;       private Statement statement = null;       private PreparedStatement newStatement = null;         private String tableName = "temp_table";       private boolean create = true;              public void optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException {           if (sheetIndex == 0 && curRow == 0) {               StringBuffer preSql = new StringBuffer("insert into " + tableName                       + " values(");               StringBuffer table = new StringBuffer("create table " + tableName                       + "(");               int c = rowlist.size();               for (int i = 0; i < c; i++) {                   preSql.append("?,");                   table.append(rowlist.get(i));                   table.append("  varchar2(100) ,");               }                 table.deleteCharAt(table.length() - 1);               preSql.deleteCharAt(preSql.length() - 1);               table.append(")");               preSql.append(")");               if (create) {                   statement = conn.createStatement();                   try{                       statement.execute("drop table "+tableName);                   }catch(Exception e){                                          }finally{                       System.out.println("表 "+tableName+" 删除成功");                   }                   if (!statement.execute(table.toString())) {                       System.out.println("创建表 "+tableName+" 成功");                       // return;                   } else {                       System.out.println("创建表 "+tableName+" 失败");                       return;                   }               }               conn.setAutoCommit(false);               newStatement = conn.prepareStatement(preSql.toString());             } else if(curRow>0) {               // 一般行               int col = rowlist.size();               for (int i = 0; i < col; i++) {                   newStatement.setString(i + 1, rowlist.get(i).toString());               }               newStatement.addBatch();               if (curRow % 1000 == 0) {                   newStatement.executeBatch();                   conn.commit();               }           }       }              private static Connection getNew_Conn() {           Connection conn = null;           Properties props = new Properties();           FileInputStream fis = null;             try {               fis = new FileInputStream("D:/database.properties");               props.load(fis);               DriverManager.registerDriver(new oracle.jdbc.driver.OracleDriver());               // String jdbcURLString =               // "jdbc:oracle:thin:@192.168.0.28:1521:orcl";               StringBuffer jdbcURLString = new StringBuffer();               jdbcURLString.append("jdbc:oracle:thin:@");               jdbcURLString.append(props.getProperty("host"));               jdbcURLString.append(":");               jdbcURLString.append(props.getProperty("port"));               jdbcURLString.append(":");               jdbcURLString.append(props.getProperty("database"));               conn = DriverManager.getConnection(jdbcURLString.toString(), props                       .getProperty("user"), props.getProperty("password"));           } catch (Exception e) {               e.printStackTrace();           } finally {               try {                   fis.close();               } catch (IOException e) {                   e.printStackTrace();               }           }           return conn;       }              public int close() {           try {               newStatement.executeBatch();               conn.commit();               System.out.println("数据写入完毕");               this.newStatement.close();               this.statement.close();               this.conn.close();               return 1;           } catch (SQLException e) {               return 0;           }       }   }   package com.gaosheng.util.examples.xls; import java.io.FileInputStream; import java.io.IOException; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Statement; import java.util.List; import java.util.Properties; import com.gaosheng.util.xls.XxlsAbstract; public class XxlsBig extends XxlsAbstract { public static void main(String[] args) throws Exception { XxlsBig howto = new XxlsBig("temp_table"); howto.processOneSheet("F:/new.xlsx",1); howto.process("F:/new.xlsx"); howto.close(); } public XxlsBig(String tableName) throws SQLException{ this.conn = getNew_Conn(); this.statement = conn.createStatement(); this.tableName = tableName; } private Connection conn = null; private Statement statement = null; private PreparedStatement newStatement = null; private String tableName = "temp_table"; private boolean create = true; public void optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException { if (sheetIndex == 0 && curRow == 0) { StringBuffer preSql = new StringBuffer("insert into " + tableName + " values("); StringBuffer table = new StringBuffer("create table " + tableName + "("); int c = rowlist.size(); for (int i = 0; i < c; i++) { preSql.append("?,"); table.append(rowlist.get(i)); table.append(" varchar2(100) ,"); } table.deleteCharAt(table.length() - 1); preSql.deleteCharAt(preSql.length() - 1); table.append(")"); preSql.append(")"); if (create) { statement = conn.createStatement(); try{ statement.execute("drop table "+tableName); }catch(Exception e){ }finally{ System.out.println("表 "+tableName+" 删除成功"); } if (!statement.execute(table.toString())) { System.out.println("创建表 "+tableName+" 成功"); // return; } else { System.out.println("创建表 "+tableName+" 失败"); return; } } conn.setAutoCommit(false); newStatement = conn.prepareStatement(preSql.toString()); } else if(curRow>0) { // 一般行 int col = rowlist.size(); for (int i = 0; i < col; i++) { newStatement.setString(i + 1, rowlist.get(i).toString()); } newStatement.addBatch(); if (curRow % 1000 == 0) { newStatement.executeBatch(); conn.commit(); } } } private static Connection getNew_Conn() { Connection conn = null; Properties props = new Properties(); FileInputStream fis = null; try { fis = new FileInputStream("D:/database.properties"); props.load(fis); DriverManager.registerDriver(new oracle.jdbc.driver.OracleDriver()); // String jdbcURLString = // "jdbc:oracle:thin:@192.168.0.28:1521:orcl"; StringBuffer jdbcURLString = new StringBuffer(); jdbcURLString.append("jdbc:oracle:thin:@"); jdbcURLString.append(props.getProperty("host")); jdbcURLString.append(":"); jdbcURLString.append(props.getProperty("port")); jdbcURLString.append(":"); jdbcURLString.append(props.getProperty("database")); conn = DriverManager.getConnection(jdbcURLString.toString(), props .getProperty("user"), props.getProperty("password")); } catch (Exception e) { e.printStackTrace(); } finally { try { fis.close(); } catch (IOException e) { e.printStackTrace(); } } return conn; } public int close() { try { newStatement.executeBatch(); conn.commit(); System.out.println("数据写入完毕"); this.newStatement.close(); this.statement.close(); this.conn.close(); return 1; } catch (SQLException e) { return 0; } } }

     继承类:XxlsPrint,作用:将数据输出到控制台

    Java代码 package com.gaosheng.util.examples.xls;     import java.sql.SQLException;   import java.util.List;     import com.gaosheng.util.xls.XxlsAbstract;     public class XxlsPrint extends XxlsAbstract {         @Override      public void optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException {           for (int i = 0; i < rowlist.size(); i++) {               System.out.print("'" + rowlist.get(i) + "',");           }           System.out.println();       }         public static void main(String[] args) throws Exception {           XxlsPrint howto = new XxlsPrint();           howto.processOneSheet("F:/new.xlsx",1);   //      howto.processAllSheets("F:/new.xlsx");       }   }   package com.gaosheng.util.examples.xls; import java.sql.SQLException; import java.util.List; import com.gaosheng.util.xls.XxlsAbstract; public class XxlsPrint extends XxlsAbstract { @Override public void optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException { for (int i = 0; i < rowlist.size(); i++) { System.out.print("'" + rowlist.get(i) + "',"); } System.out.println(); } public static void main(String[] args) throws Exception { XxlsPrint howto = new XxlsPrint(); howto.processOneSheet("F:/new.xlsx",1); // howto.processAllSheets("F:/new.xlsx"); } }

     源代码在附件中,还包含了说明文件、数据库配置文件、以及整合xls文件和xlsx文件读取的类:Xls2Do。

    最新回复(0)