package com.mob.bbssdk.sample.utils; import android.util.Xml; import org.apache.poi.hssf.usermodel.HSSFCell; import org.apache.poi.hssf.usermodel.HSSFCellStyle; import org.apache.poi.hssf.usermodel.HSSFDateUtil; import org.apache.poi.hssf.usermodel.HSSFPalette; import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFSheet; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.hssf.util.HSSFColor; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.converter.PicturesManager; import org.apache.poi.hwpf.converter.WordToHtmlConverter; import org.apache.poi.hwpf.usermodel.PictureType; import org.apache.poi.ss.util.CellRangeAddress; import org.w3c.dom.Document; import org.xmlpull.v1.XmlPullParser; import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.text.DecimalFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Locale; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; public class OfficeConverter { private String filePath; private String extension; private String htmlPath; private String imagePath; public OfficeConverter(String filePath, String extension, String htmlPath, String imagePath) { this.filePath = filePath; this.extension = extension; this.htmlPath = htmlPath; this.imagePath = imagePath; } private void docToHtml() throws Throwable { HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(filePath)); WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument()); //设置图片路径 wordToHtmlConverter.setPicturesManager(new PicturesManager() { public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) { //保存图片 String picPath = imagePath + File.separator + suggestedName; try { OutputStream out = new FileOutputStream(picPath); out.write(content); out.close(); } catch (Throwable t) { t.printStackTrace(); } return picPath; } }); wordToHtmlConverter.processDocument(wordDocument); Document htmlDocument = wordToHtmlConverter.getDocument(); OutputStream out = new FileOutputStream(htmlPath); DOMSource domSource = new DOMSource(htmlDocument); StreamResult streamResult = new StreamResult(out); TransformerFactory tf = TransformerFactory.newInstance(); Transformer serializer = tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8"); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); out.close(); } public void convertToHtml() throws Throwable { if ("doc".equals(extension)) { docToHtml(); } else if ("docx".equals(extension)) { docxToHtml(); } else if ("xls".equals(extension)) { xlsToHtml(); } else if ("xlsx".equals(extension)) { xlsxToHtml(); } else if ("txt".equals(extension)) { txtToHtml(); } } private void docxToHtml() throws Throwable { FileOutputStream output = new FileOutputStream(new File(htmlPath));// new一个流,目标为html文件 String head = "<!DOCTYPE><html><meta charset=\"utf-8\"><body>";// 定义头文件,我在这里加了utf-8,不然会出现乱码 String end = "</body></html>"; String tagBegin = "<p>";// 段落开始,标记开始? String tagEnd = "</p>";// 段落结束 String tableBegin = "<table style=\"border-collapse:collapse\" border=1 bordercolor=\"black\">"; String tableEnd = "</table>"; String rowBegin = "<tr>"; String rowEnd = "</tr>"; String colBegin = "<td>"; String colEnd = "</td>"; output.write(head.getBytes());// 写如头部 ZipFile file = new ZipFile(new File(filePath)); ZipEntry sharedStringXML = file.getEntry("word/document.xml"); InputStream inputStream = file.getInputStream(sharedStringXML); XmlPullParser xmlParser = Xml.newPullParser(); xmlParser.setInput(inputStream, "utf-8"); int evtType = xmlParser.getEventType(); boolean isTable = false; // 是表格 用来统计 列 行 数 boolean isSize = false; // 大小状态 boolean isColor = false; // 颜色状态 boolean isCenter = false; // 居中状态 boolean isRight = false; // 居右状态 boolean isItalic = false; // 是斜体 boolean isUnderline = false; // 是下划线 boolean isBold = false; // 加粗 boolean isR = false; // 在那个r中 int pictureIndex = 1; // docx 压缩包中的图片名 image1 开始 所以索引从1开始 while (evtType != XmlPullParser.END_DOCUMENT) { switch (evtType) { // 开始标签 case XmlPullParser.START_TAG: { String tag = xmlParser.getName(); if (tag.equalsIgnoreCase("r")) { isR = true; } if (tag.equalsIgnoreCase("u")) { // 判断下划线 isUnderline = true; } if (tag.equalsIgnoreCase("jc")) { // 判断对齐方式 String align = xmlParser.getAttributeValue(0); if (align.equals("center")) { output.write("<center>".getBytes()); isCenter = true; } if (align.equals("right")) { output.write("<div align=\"right\">".getBytes()); isRight = true; } } if (tag.equalsIgnoreCase("color")) { // 判断颜色 String color = xmlParser.getAttributeValue(0); output.write(("<span style=\"color:" + color + ";\">").getBytes()); isColor = true; } if (tag.equalsIgnoreCase("sz")) { // 判断大小 if (isR) { int size = decideSize(Integer.valueOf(xmlParser.getAttributeValue(0))); output.write(("<font size=" + size + ">").getBytes()); isSize = true; } } // 下面是表格处理 if (tag.equalsIgnoreCase("tbl")) { // 检测到tbl 表格开始 output.write(tableBegin.getBytes()); isTable = true; } if (tag.equalsIgnoreCase("tr")) { // 行 output.write(rowBegin.getBytes()); } if (tag.equalsIgnoreCase("tc")) { // 列 output.write(colBegin.getBytes()); } if (tag.equalsIgnoreCase("pic")) { // 检测到标签 pic 图片 String entryNameJPG = "word/media/image" + pictureIndex + ".jpeg"; String entryNamePNG = "word/media/image" + pictureIndex + ".png"; String entryNameGIF = "word/media/image" + pictureIndex + ".gif"; String entryNameWMF = "word/media/image" + pictureIndex + ".wmf"; String suffix = ".jpg"; ZipEntry sharePicture = file.getEntry(entryNameJPG); // 一下为读取docx的图片 转化为流数组 if (sharePicture == null) { suffix = ".png"; sharePicture = file.getEntry(entryNamePNG); } if (sharePicture == null) { suffix = ".gif"; sharePicture = file.getEntry(entryNameGIF); } if (sharePicture == null) { suffix = ".wmf"; sharePicture = file.getEntry(entryNameWMF); } if (sharePicture != null) { InputStream pictIS = file.getInputStream(sharePicture); String picPath = imagePath + "/" + pictureIndex + suffix; FileOutputStream pOut = new FileOutputStream(new File(picPath)); byte[] buffer = new byte[512]; int len; while ((len = pictIS.read(buffer)) != -1) { pOut.write(buffer, 0, len); } pictIS.close(); pOut.close(); String imageString = "<img src=\"" + picPath + "\">"; try { output.write(imageString.getBytes()); } catch (Throwable t) { t.printStackTrace(); } } // 转换一张后 索引+1 pictureIndex++; } if (tag.equalsIgnoreCase("b")) { // 检测到加粗标签 isBold = true; } if (tag.equalsIgnoreCase("p")) {// 检测到 p 标签 if (!isTable) { // 如果在表格中 就无视 output.write(tagBegin.getBytes()); } } if (tag.equalsIgnoreCase("i")) { // 斜体 isItalic = true; } // 检测到值 标签 if (tag.equalsIgnoreCase("t")) { if (isBold) { // 加粗 output.write("<b>".getBytes()); } if (isUnderline) { // 检测到下划线标签,输入<u> output.write("<u>".getBytes()); } if (isItalic) { // 检测到斜体标签,输入<i> output.write("<i>".getBytes()); } output.write(xmlParser.nextText().getBytes()); // 写入数值 if (isItalic) { // 检测到斜体标签,在输入值之后,输入</i>,并且斜体状态=false output.write("</i>".getBytes()); isItalic = false; } if (isUnderline) {// 检测到下划线标签,在输入值之后,输入</u>,并且下划线状态=false output.write("</u>".getBytes()); isUnderline = false; } if (isBold) { // 加粗 output.write("</b>".getBytes()); isBold = false; } if (isSize) { // 检测到大小设置,输入结束标签 output.write("</font>".getBytes()); isSize = false; } if (isColor) { // 检测到颜色设置存在,输入结束标签 output.write("</span>".getBytes()); isColor = false; } if (isCenter) { // 检测到居中,输入结束标签 output.write("</center>".getBytes()); isCenter = false; } if (isRight) { // 居右不能使用<right></right>,使用div可能会有状况,先用着 output.write("</div>".getBytes()); isRight = false; } } } break; // 结束标签 case XmlPullParser.END_TAG: { String tag2 = xmlParser.getName(); if (tag2.equalsIgnoreCase("tbl")) { // 检测到表格结束,更改表格状态 output.write(tableEnd.getBytes()); isTable = false; } if (tag2.equalsIgnoreCase("tr")) { // 行结束 output.write(rowEnd.getBytes()); } if (tag2.equalsIgnoreCase("tc")) { // 列结束 output.write(colEnd.getBytes()); } if (tag2.equalsIgnoreCase("p")) { // p结束,如果在表格中就无视 if (!isTable) { output.write(tagEnd.getBytes()); } } if (tag2.equalsIgnoreCase("r")) { isR = false; } } break; } evtType = xmlParser.next(); } output.write(end.getBytes()); output.close(); } private void xlsToHtml() throws Throwable { FileOutputStream output = new FileOutputStream(new File(htmlPath)); StringBuffer htmlHeaderSB = new StringBuffer(); htmlHeaderSB.append("<html xmlns:o='urn:schemas-microsoft-com:office:office' xmlns:x='urn:schemas-microsoft-com:office:excel' " + "xmlns='http://www.w3.org/TR/REC-html40'>"); htmlHeaderSB.append("<head><meta http-equiv=Content-Type content='text/html; charset=utf-8'><meta name=ProgId content=Excel.Sheet>" + "</head><body>"); output.write(htmlHeaderSB.toString().getBytes()); HSSFSheet sheet; HSSFWorkbook workbook = new HSSFWorkbook(new FileInputStream(filePath)); // 获整个Excel for (int sheetIndex = 0; sheetIndex < workbook.getNumberOfSheets(); sheetIndex++) { if (workbook.getSheetAt(sheetIndex) != null) { sheet = workbook.getSheetAt(sheetIndex);// 获得不为空的这个sheet if (sheet != null) { int firstRowNum = sheet.getFirstRowNum(); // 第一行 int lastRowNum = sheet.getLastRowNum(); // 最后一行 // 构造Table output.write(("<table width=\"100%\" style=\"border:1px solid #000;border-width:1px 0 0 1px;margin:2px 0 2px 0;" + "border-collapse:collapse;\">").getBytes()); for (int rowNum = firstRowNum; rowNum <= lastRowNum; rowNum++) { if (sheet.getRow(rowNum) != null) {// 如果行不为空, HSSFRow row = sheet.getRow(rowNum); short firstCellNum = row.getFirstCellNum(); // 该行的第一个单元格 short lastCellNum = row.getLastCellNum(); // 该行的最后一个单元格 int height = (int) (row.getHeight() / 15.625); // 行的高度 output.write(("<tr height=\"" + height + "\" style=\"border:1px solid #000;border-width:0 1px 1px 0;" + "margin:2px 0 2px 0;\">").getBytes()); for (short cellNum = firstCellNum; cellNum <= lastCellNum; cellNum++) { // 循环该行的每一个单元格 HSSFCell cell = row.getCell(cellNum); if (cell != null) { if (cell.getCellType() != HSSFCell.CELL_TYPE_BLANK) { StringBuffer tdStyle = new StringBuffer("<td style=\"border:1px solid #000; border-width:0 1px 1px 0;" + "margin:2px 0 2px 0; "); HSSFCellStyle cellStyle = cell.getCellStyle(); HSSFPalette palette = workbook.getCustomPalette(); // 类HSSFPalette用于求颜色的国际标准形式 HSSFColor hColor = palette.getColor(cellStyle.getFillForegroundColor()); HSSFColor hColor2 = palette.getColor(cellStyle.getFont(workbook).getColor()); String bgColor = convertToStardColor(hColor);// 背景颜色 short boldWeight = cellStyle.getFont(workbook).getBoldweight(); // 字体粗细 short fontHeight = (short) (cellStyle.getFont(workbook).getFontHeight() / 2); // 字体大小 String fontColor = convertToStardColor(hColor2); // 字体颜色 if (bgColor != null && !"".equals(bgColor.trim())) { tdStyle.append(" background-color:"); tdStyle.append(bgColor); tdStyle.append("; "); } if (fontColor != null && !"".equals(fontColor.trim())) { tdStyle.append(" color:"); tdStyle.append(fontColor); tdStyle.append("; "); } tdStyle.append(" font-weight:"); tdStyle.append(boldWeight); tdStyle.append("; "); tdStyle.append(" font-size: "); tdStyle.append(fontHeight); tdStyle.append("%;"); output.write((tdStyle + "\"").getBytes()); int width = (int) (sheet.getColumnWidth(cellNum) / 35.7); // int cellRegionCol = getMergerCellRegionCol(sheet, rowNum, cellNum); // 合并的列(solspan) int cellRegionRow = getMergerCellRegionRow(sheet, rowNum, cellNum);// 合并的行(rowspan) String align = convertAlignToHtml(cellStyle.getAlignment()); // String vAlign = convertVerticalAlignToHtml(cellStyle.getVerticalAlignment()); output.write((" align=\"" + align + "\" valign=\"" + vAlign + "\" width=\"" + width + "\" ").getBytes()); output.write((" colspan=\"" + cellRegionCol + "\" rowspan=\"" + cellRegionRow + "\"").getBytes()); output.write((">" + getCellValue(cell) + "</td>").getBytes()); } } } output.write("</tr>".getBytes()); } } output.write(("</table>").getBytes()); } } } output.write(("</body></html>").getBytes()); output.close(); } private void txtToHtml() throws Throwable { FileOutputStream output = new FileOutputStream(new File(htmlPath)); String head = "<!DOCTYPE><html><head><meta charset=\"utf-8\"><meta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\">" + "<meta name=\"viewport\" content=\"initial-scale=1, maximum-scale=1\">" + "</head><body style=\"background:#fff padding:10px,10px,10px,10px\"><div style=\"font-size:.66667rem color:#666\">"; String end = "</div></body></html>"; output.write(head.getBytes()); InputStreamReader isr = new InputStreamReader(new FileInputStream(filePath), getCharset(filePath)); BufferedReader br = new BufferedReader(isr); String line; while ((line = br.readLine()) != null) { output.write(line.getBytes()); output.write("<br />".getBytes("UTF-8")); } br.close(); isr.close(); output.write(end.getBytes()); output.close(); } private String getCharset(String fileName) throws IOException { BufferedInputStream bin = new BufferedInputStream(new FileInputStream(fileName)); int p = (bin.read() << 8) + bin.read(); String code = "GBK"; switch (p) { case 0xefbb: { code = "UTF-8"; } break; case 0xfffe: { code = "Unicode"; } break; case 0xfeff: { code = "UTF-16BE"; } break; } return code; } private void xlsxToHtml() throws Throwable { FileOutputStream output = new FileOutputStream(new File(htmlPath)); String head = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"\"http://www.w3.org/TR/html4/loose.dtd\">" + "<html><meta charset=\"utf-8\"><head></head><body>";// 定义头文件,我在这里加了utf-8,不然会出现乱码 String tableBegin = "<table style=\"border-collapse:collapse\" border=1 bordercolor=\"black\">"; String tableEnd = "</table>"; String rowBegin = "<tr>"; String rowEnd = "</tr>"; String colBegin = "<td>"; String colEnd = "</td>"; String end = "</body></html>"; output.write(head.getBytes()); output.write(tableBegin.getBytes()); String v = null; boolean flat = false; List<String> ls = new ArrayList<String>(); ZipFile file = new ZipFile(new File(filePath));// 地址 ZipEntry sharedStringXML = file.getEntry("xl/sharedStrings.xml");// 共享字符串 InputStream inputStream = file.getInputStream(sharedStringXML);// 输入流 目标上面的共享字符串 XmlPullParser xmlParser = Xml.newPullParser();// new 解析器 xmlParser.setInput(inputStream, "utf-8");// 设置解析器类型 int evtType = xmlParser.getEventType();// 获取解析器的事件类型 while (evtType != XmlPullParser.END_DOCUMENT) {// 如果不等于 文档结束 switch (evtType) { case XmlPullParser.START_TAG: {// 标签开始 String tag = xmlParser.getName(); if (tag.equalsIgnoreCase("t")) { ls.add(xmlParser.nextText()); } } break; case XmlPullParser.END_TAG: { // 标签结束 } break; } evtType = xmlParser.next(); } ZipEntry sheetXML = file.getEntry("xl/worksheets/sheet1.xml"); InputStream inputStreamsheet = file.getInputStream(sheetXML); XmlPullParser xmlParsersheet = Xml.newPullParser(); xmlParsersheet.setInput(inputStreamsheet, "utf-8"); int evtTypesheet = xmlParsersheet.getEventType(); output.write(rowBegin.getBytes()); while (evtTypesheet != XmlPullParser.END_DOCUMENT) { switch (evtTypesheet) { case XmlPullParser.START_TAG: {// 标签开始 String tag = xmlParsersheet.getName(); if (!tag.equalsIgnoreCase("row")) { if (tag.equalsIgnoreCase("c")) { String t = xmlParsersheet.getAttributeValue(null, "t"); if (t != null) { flat = true; } else {// 没有数据时 找了我n年,终于找到了 输入<td></td> 表示空格 output.write(colBegin.getBytes()); output.write(colEnd.getBytes()); flat = false; } } else { if (tag.equalsIgnoreCase("v")) { v = xmlParsersheet.nextText(); output.write(colBegin.getBytes()); if (v != null) { String str; if (flat) { str = ls.get(Integer.parseInt(v)); } else { str = v; } output.write(str.getBytes()); output.write(colEnd.getBytes()); } } } } } break; case XmlPullParser.END_TAG: { if (xmlParsersheet.getName().equalsIgnoreCase("row") && v != null) { output.write(rowBegin.getBytes()); } } break; } evtTypesheet = xmlParsersheet.next(); } output.write(rowEnd.getBytes()); output.write(tableEnd.getBytes()); output.write(end.getBytes()); output.close(); } /** * 取得单元格的值 */ private static Object getCellValue(HSSFCell cell) throws Throwable { Object value = ""; if (cell.getCellType() == HSSFCell.CELL_TYPE_STRING) { value = cell.getRichStringCellValue().toString(); } else if (cell.getCellType() == HSSFCell.CELL_TYPE_NUMERIC) { if (HSSFDateUtil.isCellDateFormatted(cell)) { Date date = cell.getDateCellValue(); SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd", Locale.CHINA); value = sdf.format(date); } else { DecimalFormat format = new DecimalFormat("#0.###"); value = format.format(cell.getNumericCellValue()); } } if (cell.getCellType() == HSSFCell.CELL_TYPE_BLANK) { value = ""; } return value; } /** * 判断单元格在不在合并单元格范围内,如果是,获取其合并的列数。 */ private static int getMergerCellRegionCol(HSSFSheet sheet, int cellRow, int cellCol) throws Throwable { int retVal = 0; int sheetMergerCount = sheet.getNumMergedRegions(); for (int i = 0; i < sheetMergerCount; i++) { CellRangeAddress cra = sheet.getMergedRegion(i); int firstRow = cra.getFirstRow(); // 合并单元格CELL起始行 int firstCol = cra.getFirstColumn(); // 合并单元格CELL起始列 int lastRow = cra.getLastRow(); // 合并单元格CELL结束行 int lastCol = cra.getLastColumn(); // 合并单元格CELL结束列 if (cellRow >= firstRow && cellRow <= lastRow) { // 判断该单元格是否是在合并单元格中 if (cellCol >= firstCol && cellCol <= lastCol) { retVal = lastCol - firstCol + 1; // 得到合并的列数 break; } } } return retVal; } /** * 判断单元格是否是合并的单格,如果是,获取其合并的行数。 */ private static int getMergerCellRegionRow(HSSFSheet sheet, int cellRow, int cellCol) throws Throwable { int retVal = 0; int sheetMergerCount = sheet.getNumMergedRegions(); for (int i = 0; i < sheetMergerCount; i++) { CellRangeAddress cra = sheet.getMergedRegion(i); int firstRow = cra.getFirstRow(); // 合并单元格CELL起始行 int firstCol = cra.getFirstColumn(); // 合并单元格CELL起始列 int lastRow = cra.getLastRow(); // 合并单元格CELL结束行 int lastCol = cra.getLastColumn(); // 合并单元格CELL结束列 if (cellRow >= firstRow && cellRow <= lastRow) { // 判断该单元格是否是在合并单元格中 if (cellCol >= firstCol && cellCol <= lastCol) { retVal = lastRow - firstRow + 1; // 得到合并的行数 break; } } } return retVal; } /** * 单元格背景色转换 */ private String convertToStardColor(HSSFColor hc) { StringBuffer sb = new StringBuffer(""); if (hc != null) { int a = HSSFColor.AUTOMATIC.index; int b = hc.getIndex(); if (a == b) { return null; } sb.append("#"); for (int i = 0; i < hc.getTriplet().length; i++) { String str; String strTmp = Integer.toHexString(hc.getTriplet()[i]); if (strTmp != null && strTmp.length() < 2) { str = "0" + strTmp; } else { str = strTmp; } sb.append(str); } } return sb.toString(); } /** * 单元格小平对齐 */ private String convertAlignToHtml(short alignment) { String align = "left"; switch (alignment) { case HSSFCellStyle.ALIGN_LEFT: { align = "left"; } break; case HSSFCellStyle.ALIGN_CENTER: { align = "center"; } break; case HSSFCellStyle.ALIGN_RIGHT: { align = "right"; } break; } return align; } /** * 单元格垂直对齐 */ private String convertVerticalAlignToHtml(short verticalAlignment) { String align = "middle"; switch (verticalAlignment) { case HSSFCellStyle.VERTICAL_BOTTOM: { align = "bottom"; } break; case HSSFCellStyle.VERTICAL_CENTER: { align = "center"; } break; case HSSFCellStyle.VERTICAL_TOP: { align = "top"; } break; } return align; } private int decideSize(int size) { if (size >= 1 && size <= 8) { return 1; } if (size >= 9 && size <= 11) { return 2; } if (size >= 12 && size <= 14) { return 3; } if (size >= 15 && size <= 19) { return 4; } if (size >= 20 && size <= 29) { return 5; } if (size >= 30 && size <= 39) { return 6; } if (size >= 40) { return 7; } return 3; } }