org.apache.poi.hwpf.HWPFDocument Java Examples

The following examples show how to use org.apache.poi.hwpf.HWPFDocument. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: WordUtil.java    From javatech with Creative Commons Attribution Share Alike 4.0 International 7 votes vote down vote up
public static void setDocProperties(String filename) throws IOException {
    System.out.println("filename = [" + filename + "]");
    FileInputStream fis = new FileInputStream(new File(filename));
    HWPFDocument doc = new HWPFDocument(fis);

    SummaryInformation summaryInformation = doc.getSummaryInformation();
    summaryInformation.setAuthor("张鹏");
    summaryInformation.setLastAuthor("张鹏");
    DocumentSummaryInformation documentSummaryInformation = doc.getDocumentSummaryInformation();
    documentSummaryInformation.setCompany("张鹏");
    documentSummaryInformation.setDocumentVersion("1");

    FileOutputStream fos = new FileOutputStream(new File(filename));
    doc.write(fos);

    fos.close();
    doc.close();
    fis.close();
}
 
Example #2
Source File: WordUtils.java    From job with MIT License 7 votes vote down vote up
public static String parseDoc2Html(InputStream input, String charset) throws Exception {
  HWPFDocument wordDocument = new HWPFDocument(input);
  Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
  WordToHtmlConverter converter = new WordToHtmlConverter(doc);
  converter.processDocument(wordDocument);

  ByteArrayOutputStream output = new ByteArrayOutputStream();
  try {
    DOMSource domSource = new DOMSource(converter.getDocument());
    StreamResult streamResult = new StreamResult(output);
    Transformer serializer = TransformerFactory.newInstance().newTransformer();
    // TODO 有乱码
    serializer.setOutputProperty(OutputKeys.ENCODING, charset);
    serializer.setOutputProperty(OutputKeys.INDENT, "yes");
    serializer.setOutputProperty(OutputKeys.METHOD, "html");
    serializer.transform(domSource, streamResult);
  } finally {
    input.close();
    output.close();
  }

  return new String(output.toByteArray());
}
 
Example #3
Source File: FormFieldTests.java    From kbase-doc with Apache License 2.0 6 votes vote down vote up
@Test
    public void testDoc() throws IOException {
        HWPFDocument document = new HWPFDocument(new FileInputStream("D:\\Xiaoi\\Items\\2019-07-02 合同智能分析工具\\04_现场数据\\4.25国网北京信通公司110kV半壁店站等63个站点通信蓄电池改造勘察设计合同.doc"));
//        for ( FieldsDocumentPart part : FieldsDocumentPart.values() ) {
//            System.out.println( "=== Document part: " + part + " ===" );
//            for ( Field field : document.getFields().getFields( part ) ) {
//                System.out.println(field.firstSubrange(document.getRange()).getParagraph(0).text());
//            }
//        }

        for (int i=0;i<document.getRange().numParagraphs();i++){
            Paragraph paragraph = document.getRange().getParagraph(i);
            System.out.println(paragraph.text());
            System.out.println("===============================================");
        }

//        for (Field field : document.getFields().getFields(FieldsDocumentPart.MAIN)){
//            System.out.println(field);
//            System.out.println(field.firstSubrange(document.getRange()).getParagraph(0).text());
//        }
//        System.out.println(document.getRange().getParagraph(0).text());
//        System.out.println(document.getMainTextboxRange().getSection(0).text());

    }
 
Example #4
Source File: DocProducer.java    From OfficeProducer with Apache License 2.0 6 votes vote down vote up
/**
 * 创建Doc并保存
 *
 * @param templatePath 模板doc路径
 * @param parameters   参数和值
 *                     //* @param imageParameters 书签和图片
 * @param savePath     保存doc的路径
 * @return
 */
public static void CreateDocFromTemplate(String templatePath,
                                         HashMap<String, String> parameters,
                                         //HashMap<String, String> imageParameters,
                                         String savePath)
        throws Exception {
    @Cleanup InputStream is = DocProducer.class.getResourceAsStream(templatePath);
    HWPFDocument doc = new HWPFDocument(is);
    Range range = doc.getRange();

    //把range范围内的${}替换
    for (Map.Entry<String, String> next : parameters.entrySet()) {
        range.replaceText("${" + next.getKey() + "}",
                next.getValue()
        );
    }

    @Cleanup OutputStream os = new FileOutputStream(savePath);
    //把doc输出到输出流中
    doc.write(os);
}
 
Example #5
Source File: OfficeHtmlUtil.java    From jeewx with Apache License 2.0 5 votes vote down vote up
/**
 * WORD转HTML
 * 
 * @param docfile
 *            WORD文件全路径
 * @param htmlfile
 *            转换后HTML存放路径
 * @throws Throwable
 * add by duanql	2013-07-17
 */

public void WordConverterHtml(String docfile, String htmlfile){
	try {
	InputStream input = new FileInputStream(docfile);
	HWPFDocument wordDocument = new HWPFDocument(input);
	WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
	wordToHtmlConverter.processDocument(wordDocument);
	Document htmlDocument = wordToHtmlConverter.getDocument();
	ByteArrayOutputStream outStream = new ByteArrayOutputStream();
	DOMSource domSource = new DOMSource(htmlDocument);
	StreamResult streamResult = new StreamResult(outStream);

	TransformerFactory tf = TransformerFactory.newInstance();
	Transformer serializer = tf.newTransformer();
	serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
	serializer.setOutputProperty(OutputKeys.INDENT, "yes");
	serializer.setOutputProperty(OutputKeys.METHOD, "html");
	serializer.transform(domSource, streamResult);
	outStream.close();

	String content = new String(outStream.toByteArray(), "UTF-8");
	stringToFile(content,htmlfile);
	} catch (Exception e) {
		e.printStackTrace();
	}
}
 
Example #6
Source File: MSOfficeBox.java    From wandora with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Get the text from the word file, as an array with one String
 *  per paragraph
 */
public static String[] getWordParagraphText(HWPFDocument doc) {
	String[] ret;
	
	// Extract using the model code
	try {
    	Range r = doc.getRange();

		ret = new String[r.numParagraphs()];
		for(int i=0; i<ret.length; i++) {
			Paragraph p = r.getParagraph(i);
			ret[i] = p.text();
			
			// Fix the line ending
			if(ret[i].endsWith("\r")) {
				ret[i] = ret[i] + "\n";
			}
		}
	}
               catch(Exception e) {
		// Something's up with turning the text pieces into paragraphs
		// Fall back to ripping out the text pieces
		ret = new String[1];
		ret[0] = getWordTextFromPieces(doc);
	}
	
	return ret;
}
 
Example #7
Source File: MSOfficeBox.java    From wandora with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Grab the text, based on the paragraphs. Shouldn't include any crud,
 *  but slightly slower than getTextFromPieces().
 */
public static String getWordTextOld(HWPFDocument doc) {
	StringBuilder ret = new StringBuilder();
	String[] text = getWordParagraphText(doc);
	for(int i=0; i<text.length; i++) {
		ret.append(text[i]);
	}
	return ret.toString();
}
 
Example #8
Source File: OfficeHtmlUtil.java    From jeecg with Apache License 2.0 5 votes vote down vote up
/**
 * WORD转HTML
 * 
 * @param docfile
 *            WORD文件全路径
 * @param htmlfile
 *            转换后HTML存放路径
 * @throws Throwable
 * add by duanql	2013-07-17
 */

public void WordConverterHtml(String docfile, String htmlfile){
	try {
	InputStream input = new FileInputStream(docfile);
	HWPFDocument wordDocument = new HWPFDocument(input);
	WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
	wordToHtmlConverter.processDocument(wordDocument);
	Document htmlDocument = wordToHtmlConverter.getDocument();
	ByteArrayOutputStream outStream = new ByteArrayOutputStream();
	DOMSource domSource = new DOMSource(htmlDocument);
	StreamResult streamResult = new StreamResult(outStream);

	TransformerFactory tf = TransformerFactory.newInstance();
	Transformer serializer = tf.newTransformer();
	serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
	serializer.setOutputProperty(OutputKeys.INDENT, "yes");
	serializer.setOutputProperty(OutputKeys.METHOD, "html");
	serializer.transform(domSource, streamResult);
	outStream.close();

	String content = new String(outStream.toByteArray(), "UTF-8");
	stringToFile(content,htmlfile);
	} catch (Exception e) {
		e.printStackTrace();
	}
}
 
Example #9
Source File: WordUtils.java    From java-tutorial with MIT License 4 votes vote down vote up
/**
 * 向word模板中填充数据,生成新word
 * templePath 是word模板的路径,outPutPath是输出路径
 * 注意模板是doc导出的就是doc,docx导出的是docx,否则会错误,
 *
 * @param mapData    填充的数据
 * @param templePath 模板路径
 * @throws Exception
 */
public static void generateWord(HashMap mapData, String templePath, String outPutPath) throws Exception {
    //因为空格无法输出,过滤一下空格 用-代替
    Iterator<Map.Entry<String, String>> iter = mapData.entrySet().iterator();
    while (iter.hasNext()) {
        Map.Entry entry = iter.next();
        Object key = entry.getKey();
        Object val = entry.getValue();
        if ("".equals(val)) {
            //空格无法输出
            mapData.put(key, "-");
        }
    }

    //先创建文件
    File file = new File(outPutPath);
    if (file.exists()) {
        file.delete();
    }
    file.getParentFile().mkdirs();
    file.createNewFile();

    try {
        //获取文件后缀
        String suffix = getSuffix(templePath);
        if (suffix.equalsIgnoreCase(DOCX_SUFFIX)) {
            XWPFDocument doc = WordExportUtil.exportWord07(templePath, mapData);
            FileOutputStream fos = new FileOutputStream(file);
            doc.write(fos);
            fos.close();
        } else if (suffix.equalsIgnoreCase(DOC_SUFFIX)) {
            HWPFDocument hwpfDocument = new HWPFDocument(new FileInputStream(templePath));
            Range range = hwpfDocument.getRange();
            getRange(range, mapData);
            FileOutputStream stream = new FileOutputStream(file);
            hwpfDocument.write(stream);
            stream.flush();
            stream.close();
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}
 
Example #10
Source File: WatermarkServiceImpl.java    From kbase-doc with Apache License 2.0 4 votes vote down vote up
private void addWaterMark(Object obj, String watermark, String color) {
	if (obj instanceof XWPFDocument) {
		XWPFDocument doc = (XWPFDocument) obj;
		// create header-footer
		XWPFHeaderFooterPolicy headerFooterPolicy = doc.getHeaderFooterPolicy();
		if (headerFooterPolicy == null) headerFooterPolicy = doc.createHeaderFooterPolicy();
		
		// create default Watermark - fill color black and not rotated
		headerFooterPolicy.createWatermark(watermark);
		
		// get the default header
		// Note: createWatermark also sets FIRST and EVEN headers 
		// but this code does not updating those other headers
		XWPFHeader header = headerFooterPolicy.getHeader(XWPFHeaderFooterPolicy.DEFAULT);
		XWPFParagraph paragraph = header.getParagraphArray(0);
		
		// get com.microsoft.schemas.vml.CTShape where fill color and rotation is set
		XmlObject[] xmlobjects = paragraph.getCTP().getRArray(0).getPictArray(0).selectChildren(new QName("urn:schemas-microsoft-com:vml", "shape"));
		if (xmlobjects.length > 0) {
			com.microsoft.schemas.vml.CTShape ctshape = (com.microsoft.schemas.vml.CTShape)xmlobjects[0];
			// set fill color
			ctshape.setFillcolor(color);
			// set rotation
			ctshape.setStyle(ctshape.getStyle() + ";rotation:315");
		}
	} else if (obj instanceof HWPFDocument) {
		
	}
}
 
Example #11
Source File: MSOfficeBox.java    From wandora with GNU General Public License v3.0 3 votes vote down vote up
public static String getWordTextOld(InputStream is) {
    try {
        return getWordTextOld(new HWPFDocument(is));
    }
    catch(Exception e) {
        e.printStackTrace();
    }
    return null;
}