标签:
POI word文件转html
package com.feiruo.officeConvert; import java.io.BufferedWriter; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.util.List; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; import org.apache.poi.hwpf.usermodel.Picture; public abstract class OfficeConvert { // 图片的存放地址 private String imgPath = null; // 文件存放的地址 private String parentPath = null; // 文件内容 private String fileContent = null; private String encode = "UTF-8"; /** * 将指定的doc文档进行格式转换 * * @param docPath * *.doc文档地址 * * @throws FileNotFoundException * @throws IOException * @throws ParserConfigurationException * @throws TransformerException */ public abstract void convert(String docPath) throws FileNotFoundException, IOException, ParserConfigurationException, TransformerException; /** * 将文件内容写入到磁盘 * * @param filepath * 保存转换文件的地址 */ public void writeFile(String filepath) { FileOutputStream fos = null; BufferedWriter bw = null; File f=new File(this.parentPath); if(!f.exists()){ f.mkdirs(); } try { File file = new File(filepath); fos = new FileOutputStream(file); bw = new BufferedWriter(new OutputStreamWriter(fos, encode)); bw.write(fileContent); } catch (FileNotFoundException fnfe) { fnfe.printStackTrace(); } catch (IOException ioe) { ioe.printStackTrace(); } finally { try { if (bw != null) bw.close(); if (fos != null) fos.close(); } catch (IOException ie) { } } } public String checkSetPath(String path){ path=path.trim(); if(path.lastIndexOf("/")<path.length()-1) path+="/"; if(path.indexOf("\"")>0)path=path.replaceAll("\"", ""); if(path.indexOf(">")>0)path=path.replaceAll(">", ">"); if(path.indexOf("<")>0)path=path.replaceAll("<", "<"); //TODO if(path.indexOf("*")>0)path=path.replaceAll("/*", ""); return path; } public String getEncode() { return encode; } public void setEncode(String encode) { this.encode = encode; } /** * 获取图片存放地址 * * @return <strong>java.lang.String</strong> */ public String getImgPath() { return imgPath; } /** * 设置图片的存放地址文件夹路径 * * @param imgPath * 设置图片的存放文件夹名称 */ public void setImgPath(String imgPath) { this.imgPath = checkSetPath(imgPath); } /** * 获取存放文件的目录地址 * * @return <strong>java.lang.String</strong> */ public String getParentPath() { return parentPath; } /** * 设置文件存放的路径 * * @param parentPath * 文件地址 */ public void setParentPath(String parentPath) { this.parentPath = checkSetPath(parentPath); } /** * 获取文件内容 * * @return <strong>java.lang.String</strong> */ public String getFileContent() { return fileContent; } public void setFileContent(String content){ this.fileContent=content; } }
package com.feiruo.officeConvert; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.util.List; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.converter.PicturesManager; import org.apache.poi.hwpf.converter.WordToHtmlConverter; import org.apache.poi.hwpf.usermodel.Picture; import org.apache.poi.hwpf.usermodel.PictureType; import org.w3c.dom.Document; /** * 将*.doc文档转换为*.html文件格式 * * @author Jdk.feiruo. * @since JDK 1.7 POI 3.8 * @version 1.0 */ public class DocToHtml extends OfficeConvert implements IOfficeConvert { private List<Picture> pics = null; /** * @param parentPath * html文件存放地址 * @param imageppth * html图片存放地址 * @param encoding * 设置html的编码格式 */ public DocToHtml(String parentPath, String imageppth, String encoding) { setParentPath(checkSetPath(parentPath)); setImgPath(checkSetPath(imageppth)); this.setEncode(encoding); } public DocToHtml() { } /** * 将*doc文档转为*html文件 * * @param docPath * *doc文档的所在地址 * * @throws FileNotFoundException * @throws IOException * @throws ParserConfigurationException * @throws TransformerException */ public void convert(String docPath) throws FileNotFoundException, IOException, ParserConfigurationException, TransformerException { HWPFDocument wordDocument = new HWPFDocument(new FileInputStream( docPath)); WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter( DocumentBuilderFactory.newInstance().newDocumentBuilder() .newDocument()); wordToHtmlConverter.setPicturesManager(new PicturesManager() { public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) { return suggestedName; } }); wordToHtmlConverter.processDocument(wordDocument); pics = wordDocument.getPicturesTable().getAllPictures(); Document htmlDocument = wordToHtmlConverter.getDocument(); ByteArrayOutputStream out = new ByteArrayOutputStream(); DOMSource domSource = new DOMSource(htmlDocument); StreamResult streamResult = new StreamResult(out); TransformerFactory tf = TransformerFactory.newInstance(); Transformer serializer = tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, this.getEncode()); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); out.close(); String htmlContent = new String(out.toByteArray()); if(htmlContent.indexOf("<img src=\"") > 0){ htmlContent=htmlContent.replaceAll("<img src=\"", "<img src=\"" + getImgPath()); } setFileContent(htmlContent); } @Override public void writeWithName(String fileName) { // 先保存文档中的图片 if (pics != null) { File imgfile = new File(this.getParentPath() + this.getImgPath()); // 如果当前文件夹不存在,则创建新文件夹 if (!imgfile.exists()) imgfile.mkdirs(); for (int i = 0; i < pics.size(); i++) { Picture pic = (Picture) pics.get(i); try { pic.writeImageContent(new FileOutputStream(imgfile + "//" + pic.suggestFullFileName())); } catch (IOException e) { e.printStackTrace(); } } } // 保存html源码文件 this.writeFile(getParentPath()+fileName+".html"); } }
package com.feiruo.Test; import java.io.FileNotFoundException; import java.io.IOException; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; import com.yinhai.officeConvert.DocToHtml; public class Test{ public static void main(String[] args) { Test t=new Test(); } public Test(){ DocToHtml dth=new DocToHtml("C://test", "f", "UTF-8"); try { dth.convert("D://test//test.doc"); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (TransformerException e) { e.printStackTrace(); } dth.writeWithName("feiruo"); } }
package com.feiruo.officeConvert; public interface IOfficeConvert { /** * 将文件写入到磁盘 * @param fileName 要写入文件的名称 */ public void writeWithName(String fileName); }
标签:
原文地址:http://www.cnblogs.com/feiruo/p/5924514.html