[JavaWeb基础] 025.JAVA把word转换成html

时间：2015-10-19 22:36:31 阅读：301 评论：0 收藏：0 [点我收藏+]

标签：

用第三方插件POI把word文档转换成HTML，下面直接上代码

package com.babybus.sdteam.wordtopdf;

import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.List;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.jsoup.Jsoup;

import org.w3c.dom.Document;

public class WordToHtml {

	/**
	 * 转换word到html
	 * 
	 * @param path
	 * @return
	 * @throws IOException
	 * @throws FileNotFoundException
	 * @throws ParserConfigurationException
	 * @throws TransformerException
	 */
	public static String convertWordToHtml(String path)
			throws FileNotFoundException, IOException,
			ParserConfigurationException, TransformerException {
		// 转换的结果路径
		String htmlPath = "D://test//1.html";
		// 创建word文档
		HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(path));
		// 兼容2007 以上版本
		// XSSFWorkbook xssfwork=new XSSFWorkbook(new FileInputStream(fileName));
		
		// 创建一个转换器
		WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
				DocumentBuilderFactory.newInstance().newDocumentBuilder()
						.newDocument());
		// 设置图片管理器
		wordToHtmlConverter.setPicturesManager(new PicturesManager() {
			public String savePicture(byte[] content, PictureType pictureType,
					String suggestedName, float widthInches, float heightInches) {
				return "test/" + suggestedName;
			}
		});
		
		// 处理word文档
		wordToHtmlConverter.processDocument(wordDocument);
		
		// 保存图片集合
		List pics = wordDocument.getPicturesTable().getAllPictures();
		if (pics != null) {
			for (int i = 0; i < pics.size(); i++) {
				Picture pic = (Picture) pics.get(i);
				try {
					pic.writeImageContent(new FileOutputStream("D:/test/"
							+ pic.suggestFullFileName()));
				} catch (FileNotFoundException e) {
					e.printStackTrace();
				}
			}
		}
		
		// 取出转换的文档
		Document htmlDocument = wordToHtmlConverter.getDocument();
        
		// 创建输出流 和创建DOM源
		ByteArrayOutputStream out = new ByteArrayOutputStream();
		DOMSource domSource = new DOMSource(htmlDocument);
		StreamResult streamResult = new StreamResult(out);
        // 转换工厂
		TransformerFactory tf = TransformerFactory.newInstance();
		Transformer serializer = tf.newTransformer();
		serializer.setOutputProperty(OutputKeys.ENCODING, "GB2312");
		serializer.setOutputProperty(OutputKeys.INDENT, "yes");
		serializer.setOutputProperty(OutputKeys.METHOD, "HTML");
		serializer.transform(domSource, streamResult);
		out.close();
		// 写入文件
		writeFile(new String(out.toByteArray()), htmlPath);

		return htmlPath;
	}

	/**
	 * 写入文件
	 * 
	 * @param content
	 * @param path
	 */
	public static void writeFile(String content, String path) {
		FileOutputStream fos = null;
		BufferedWriter bw = null;
		org.jsoup.nodes.Document doc = Jsoup.parse(content);
		content = doc.html();
		try {
			File file = new File(path);
			fos = new FileOutputStream(file);
			bw = new BufferedWriter(new OutputStreamWriter(fos, "GB2312"));
			bw.write(content);
		} catch (FileNotFoundException fnfe) {
			fnfe.printStackTrace();
		} catch (IOException ioe) {
			ioe.printStackTrace();
		} finally {
			try {
				if (bw != null)
					bw.close();
				if (fos != null)
					fos.close();
			} catch (IOException ie) {
			}
		}
	}
}

编码必须用GB2312,用UTF8会有乱码问题。

技术分享

本站文章为 宝宝巴士 SD.Team 原创，转载务必在明显处注明：（作者官方网站：宝宝巴士 )

转载自【宝宝巴士SuperDo团队】 原文链接: http://www.cnblogs.com/superdo/p/4893022.html

[JavaWeb基础] 025.JAVA把word转换成html

标签：

原文地址：http://www.cnblogs.com/superdo/p/4893022.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行