org.apache.poi.hwpf.converter.WordToHtmlConverter Java Examples

The following examples show how to use org.apache.poi.hwpf.converter.WordToHtmlConverter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: WordUtils.java    From job with MIT License 7 votes vote down vote up
public static String parseDoc2Html(InputStream input, String charset) throws Exception {
  HWPFDocument wordDocument = new HWPFDocument(input);
  Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
  WordToHtmlConverter converter = new WordToHtmlConverter(doc);
  converter.processDocument(wordDocument);

  ByteArrayOutputStream output = new ByteArrayOutputStream();
  try {
    DOMSource domSource = new DOMSource(converter.getDocument());
    StreamResult streamResult = new StreamResult(output);
    Transformer serializer = TransformerFactory.newInstance().newTransformer();
    // TODO 有乱码
    serializer.setOutputProperty(OutputKeys.ENCODING, charset);
    serializer.setOutputProperty(OutputKeys.INDENT, "yes");
    serializer.setOutputProperty(OutputKeys.METHOD, "html");
    serializer.transform(domSource, streamResult);
  } finally {
    input.close();
    output.close();
  }

  return new String(output.toByteArray());
}
 
Example #2
Source File: OfficeHtmlUtil.java    From jeewx with Apache License 2.0 5 votes vote down vote up
/**
 * WORD转HTML
 * 
 * @param docfile
 *            WORD文件全路径
 * @param htmlfile
 *            转换后HTML存放路径
 * @throws Throwable
 * add by duanql	2013-07-17
 */

public void WordConverterHtml(String docfile, String htmlfile){
	try {
	InputStream input = new FileInputStream(docfile);
	HWPFDocument wordDocument = new HWPFDocument(input);
	WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
	wordToHtmlConverter.processDocument(wordDocument);
	Document htmlDocument = wordToHtmlConverter.getDocument();
	ByteArrayOutputStream outStream = new ByteArrayOutputStream();
	DOMSource domSource = new DOMSource(htmlDocument);
	StreamResult streamResult = new StreamResult(outStream);

	TransformerFactory tf = TransformerFactory.newInstance();
	Transformer serializer = tf.newTransformer();
	serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
	serializer.setOutputProperty(OutputKeys.INDENT, "yes");
	serializer.setOutputProperty(OutputKeys.METHOD, "html");
	serializer.transform(domSource, streamResult);
	outStream.close();

	String content = new String(outStream.toByteArray(), "UTF-8");
	stringToFile(content,htmlfile);
	} catch (Exception e) {
		e.printStackTrace();
	}
}
 
Example #3
Source File: OfficeHtmlUtil.java    From jeecg with Apache License 2.0 5 votes vote down vote up
/**
 * WORD转HTML
 * 
 * @param docfile
 *            WORD文件全路径
 * @param htmlfile
 *            转换后HTML存放路径
 * @throws Throwable
 * add by duanql	2013-07-17
 */

public void WordConverterHtml(String docfile, String htmlfile){
	try {
	InputStream input = new FileInputStream(docfile);
	HWPFDocument wordDocument = new HWPFDocument(input);
	WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
	wordToHtmlConverter.processDocument(wordDocument);
	Document htmlDocument = wordToHtmlConverter.getDocument();
	ByteArrayOutputStream outStream = new ByteArrayOutputStream();
	DOMSource domSource = new DOMSource(htmlDocument);
	StreamResult streamResult = new StreamResult(outStream);

	TransformerFactory tf = TransformerFactory.newInstance();
	Transformer serializer = tf.newTransformer();
	serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
	serializer.setOutputProperty(OutputKeys.INDENT, "yes");
	serializer.setOutputProperty(OutputKeys.METHOD, "html");
	serializer.transform(domSource, streamResult);
	outStream.close();

	String content = new String(outStream.toByteArray(), "UTF-8");
	stringToFile(content,htmlfile);
	} catch (Exception e) {
		e.printStackTrace();
	}
}