poi读取word2003(.doc文档)中的表格
Jakarta POI 是apache的子项目,目标是处理ole2对象。它提供了一组操纵Windows文档的Java API。在网上见到好多通过poi读取excel的文章,读写也很方便,和jxl有的一比。在这里,主要是poi对word中的表格数据读取。
具体见代码
1 import java.io.File; 2 import java.io.FileInputStream; 3 import java.io.FileNotFoundException; 4 5 import org.apache.poi.hwpf.HWPFDocument; 6 import org.apache.poi.hwpf.usermodel.Paragraph; 7 import org.apache.poi.hwpf.usermodel.Range; 8 import org.apache.poi.hwpf.usermodel.Table; 9 import org.apache.poi.hwpf.usermodel.TableCell; 10 import org.apache.poi.hwpf.usermodel.TableIterator; 11 import org.apache.poi.hwpf.usermodel.TableRow; 12 13 import java.io.File; 14 import java.io.FileInputStream; 15 import java.io.InputStream; 16 17 import org.apache.poi.POIXMLDocument; 18 import org.apache.poi.POIXMLTextExtractor; 19 import org.apache.poi.hwpf.extractor.WordExtractor; 20 import org.apache.poi.openxml4j.opc.OPCPackage; 21 import org.apache.poi.xwpf.extractor.XWPFWordExtractor; 22 23 24 import org.apache.poi.poifs.filesystem.POIFSFileSystem; 25 26 public class ExportDocImpl 27 { 28 public void testWord(){ 29 try{ 30 FileInputStream in = new FileInputStream("D:\\sinye.doc");//载入文档 31 POIFSFileSystem pfs = new POIFSFileSystem(in); 32 HWPFDocument hwpf = new HWPFDocument(pfs); 33 Range range = hwpf.getRange();//得到文档的读取范围 34 TableIterator it = new TableIterator(range); 35 //迭代文档中的表格 36 while (it.hasNext()) { 37 Table tb = (Table) it.next(); 38 //迭代行,默认从0开始 39 for (int i = 0; i < tb.numRows(); i++) { 40 TableRow tr = tb.getRow(i); 41 //迭代列,默认从0开始 42 for (int j = 0; j < tr.numCells(); j++) { 43 TableCell td = tr.getCell(j);//取得单元格 44 //取得单元格的内容 45 for(int k=0;k