标签:
用第三方插件POI把word文档转换成HTML,下面直接上代码
package com.babybus.sdteam.wordtopdf; import java.io.BufferedWriter; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.util.List; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.apache.poi.hwpf.usermodel.Picture; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.converter.PicturesManager; import org.apache.poi.hwpf.converter.WordToHtmlConverter; import org.apache.poi.hwpf.usermodel.PictureType; import org.jsoup.Jsoup; import org.w3c.dom.Document; public class WordToHtml { /** * 转换word到html * * @param path * @return * @throws IOException * @throws FileNotFoundException * @throws ParserConfigurationException * @throws TransformerException */ public static String convertWordToHtml(String path) throws FileNotFoundException, IOException, ParserConfigurationException, TransformerException { // 转换的结果路径 String htmlPath = "D://test//1.html"; // 创建word文档 HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(path)); // 兼容2007 以上版本 // XSSFWorkbook xssfwork=new XSSFWorkbook(new FileInputStream(fileName)); // 创建一个转换器 WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter( DocumentBuilderFactory.newInstance().newDocumentBuilder() .newDocument()); // 设置图片管理器 wordToHtmlConverter.setPicturesManager(new PicturesManager() { public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) { return "test/" + suggestedName; } }); // 处理word文档 wordToHtmlConverter.processDocument(wordDocument); // 保存图片集合 List pics = wordDocument.getPicturesTable().getAllPictures(); if (pics != null) { for (int i = 0; i < pics.size(); i++) { Picture pic = (Picture) pics.get(i); try { pic.writeImageContent(new FileOutputStream("D:/test/" + pic.suggestFullFileName())); } catch (FileNotFoundException e) { e.printStackTrace(); } } } // 取出转换的文档 Document htmlDocument = wordToHtmlConverter.getDocument(); // 创建输出流 和创建DOM源 ByteArrayOutputStream out = new ByteArrayOutputStream(); DOMSource domSource = new DOMSource(htmlDocument); StreamResult streamResult = new StreamResult(out); // 转换工厂 TransformerFactory tf = TransformerFactory.newInstance(); Transformer serializer = tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, "GB2312"); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "HTML"); serializer.transform(domSource, streamResult); out.close(); // 写入文件 writeFile(new String(out.toByteArray()), htmlPath); return htmlPath; } /** * 写入文件 * * @param content * @param path */ public static void writeFile(String content, String path) { FileOutputStream fos = null; BufferedWriter bw = null; org.jsoup.nodes.Document doc = Jsoup.parse(content); content = doc.html(); try { File file = new File(path); fos = new FileOutputStream(file); bw = new BufferedWriter(new OutputStreamWriter(fos, "GB2312")); bw.write(content); } catch (FileNotFoundException fnfe) { fnfe.printStackTrace(); } catch (IOException ioe) { ioe.printStackTrace(); } finally { try { if (bw != null) bw.close(); if (fos != null) fos.close(); } catch (IOException ie) { } } } }
编码必须用GB2312,用UTF8会有乱码问题。
本站文章为 宝宝巴士 SD.Team 原创,转载务必在明显处注明:(作者官方网站: 宝宝巴士 )
转载自【宝宝巴士SuperDo团队】 原文链接: http://www.cnblogs.com/superdo/p/4893022.html
[JavaWeb基础] 025.JAVA把word转换成html
标签:
原文地址:http://www.cnblogs.com/superdo/p/4893022.html