标签:equal beans group extract try row nal div content
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
public class Wordfile {
public static void main(String[] args) throws Exception {
String path = "G:\\样题.doc";
String context = readWord(path);
System.out.println(context);
}
public static String readWord(String path) {
InputStream is = null;
String content = "";
String suffix = path.substring(path.lastIndexOf(".") + 1);
try {
if (suffix.equals("doc")) {
// word 2003: 图片不会被读取
is = new FileInputStream(new File(path));
WordExtractor ex = new WordExtractor(is);// is是WORD文件的InputStream
content = ex.getText().trim()
} else if (suffix.equals("docx")) {
// word 2007 图片不会被读取, 表格中的数据会被放在字符串的最后
OPCPackage opcPackage = POIXMLDocument.openPackage(path);
POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage);
content = (extractor).getText().trim();
}
} catch (Exception e) {
e.printStackTrace();
} finally {
if (is != null) {
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return content;
}
}
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.8</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.8</version>
</dependency>
<!--2010年EXCEL需要的包-->
<dependency>
<groupId>dom4j</groupId>
<artifactId>dom4j</artifactId>
<version>1.6.1</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.8</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.8</version>
</dependency>
<dependency>
<groupId>org.apache.xmlbeans</groupId>
<artifactId>xmlbeans</artifactId>
<version>2.6.0</version>
</dependency>
标签:equal beans group extract try row nal div content
原文地址:https://www.cnblogs.com/Koaler/p/12143476.html