码迷,mamicode.com
首页 > 编程语言 > 详细

[JavaWeb基础] 025.JAVA把word转换成html

时间:2015-10-19 22:36:31      阅读:301      评论:0      收藏:0      [点我收藏+]

标签:

用第三方插件POIword文档转换成HTML,下面直接上代码

package com.babybus.sdteam.wordtopdf;

import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.List;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.jsoup.Jsoup;

import org.w3c.dom.Document;

public class WordToHtml {

	/**
	 * 转换word到html
	 * 
	 * @param path
	 * @return
	 * @throws IOException
	 * @throws FileNotFoundException
	 * @throws ParserConfigurationException
	 * @throws TransformerException
	 */
	public static String convertWordToHtml(String path)
			throws FileNotFoundException, IOException,
			ParserConfigurationException, TransformerException {
		// 转换的结果路径
		String htmlPath = "D://test//1.html";
		// 创建word文档
		HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(path));
		// 兼容2007 以上版本
		// XSSFWorkbook xssfwork=new XSSFWorkbook(new FileInputStream(fileName));
		
		// 创建一个转换器
		WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
				DocumentBuilderFactory.newInstance().newDocumentBuilder()
						.newDocument());
		// 设置图片管理器
		wordToHtmlConverter.setPicturesManager(new PicturesManager() {
			public String savePicture(byte[] content, PictureType pictureType,
					String suggestedName, float widthInches, float heightInches) {
				return "test/" + suggestedName;
			}
		});
		
		// 处理word文档
		wordToHtmlConverter.processDocument(wordDocument);
		
		// 保存图片集合
		List pics = wordDocument.getPicturesTable().getAllPictures();
		if (pics != null) {
			for (int i = 0; i < pics.size(); i++) {
				Picture pic = (Picture) pics.get(i);
				try {
					pic.writeImageContent(new FileOutputStream("D:/test/"
							+ pic.suggestFullFileName()));
				} catch (FileNotFoundException e) {
					e.printStackTrace();
				}
			}
		}
		
		// 取出转换的文档
		Document htmlDocument = wordToHtmlConverter.getDocument();
        
		// 创建输出流 和创建DOM源
		ByteArrayOutputStream out = new ByteArrayOutputStream();
		DOMSource domSource = new DOMSource(htmlDocument);
		StreamResult streamResult = new StreamResult(out);
        // 转换工厂
		TransformerFactory tf = TransformerFactory.newInstance();
		Transformer serializer = tf.newTransformer();
		serializer.setOutputProperty(OutputKeys.ENCODING, "GB2312");
		serializer.setOutputProperty(OutputKeys.INDENT, "yes");
		serializer.setOutputProperty(OutputKeys.METHOD, "HTML");
		serializer.transform(domSource, streamResult);
		out.close();
		// 写入文件
		writeFile(new String(out.toByteArray()), htmlPath);

		return htmlPath;
	}

	/**
	 * 写入文件
	 * 
	 * @param content
	 * @param path
	 */
	public static void writeFile(String content, String path) {
		FileOutputStream fos = null;
		BufferedWriter bw = null;
		org.jsoup.nodes.Document doc = Jsoup.parse(content);
		content = doc.html();
		try {
			File file = new File(path);
			fos = new FileOutputStream(file);
			bw = new BufferedWriter(new OutputStreamWriter(fos, "GB2312"));
			bw.write(content);
		} catch (FileNotFoundException fnfe) {
			fnfe.printStackTrace();
		} catch (IOException ioe) {
			ioe.printStackTrace();
		} finally {
			try {
				if (bw != null)
					bw.close();
				if (fos != null)
					fos.close();
			} catch (IOException ie) {
			}
		}
	}
}

 编码必须用GB2312,用UTF8会有乱码问题。

 

 

 技术分享

本站文章为 宝宝巴士 SD.Team 原创,转载务必在明显处注明:(作者官方网站: 宝宝巴士 

转载自【宝宝巴士SuperDo团队】 原文链接: http://www.cnblogs.com/superdo/p/4893022.html

[JavaWeb基础] 025.JAVA把word转换成html

标签:

原文地址:http://www.cnblogs.com/superdo/p/4893022.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!