org.apache.poi.poifs.filesystem.OfficeXmlFileException Java Examples

The following examples show how to use org.apache.poi.poifs.filesystem.OfficeXmlFileException. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: PresentationFactory.java From Quelea with GNU General Public License v3.0

6 votes

/**
 * Generates a presentation object from a file.
 *
 * @param file the file to generate the presentation from.
 * @return the presentation object, or null if a problem occurs.
 */
public Presentation getPresentation(File file) throws IOException {
    Presentation ret = null;
    if(Utils.hasExtension(file, "ppt") || Utils.hasExtension(file, "pptx")) {
        try {
            ret = new PPTPresentation(file.getAbsolutePath());
        }
        catch(OfficeXmlFileException ex) {
            ret = new PPTXPresentation(file.getAbsolutePath());
        }
    }
    else {
        LOGGER.log(Level.WARNING, "Illegal file type: {0}", file.getName());
    }
    return ret;
}

Example #2

Source File: FileBeanParser.java From everywhere with Apache License 2.0

6 votes

private static String readDoc (String filePath, InputStream is) throws Exception {
    String text= "";
    is = FileMagic.prepareToCheckMagic(is);
    try {
        if (FileMagic.valueOf(is) == FileMagic.OLE2) {
            WordExtractor ex = new WordExtractor(is);
            text = ex.getText();
            ex.close();
        } else if(FileMagic.valueOf(is) == FileMagic.OOXML) {
            XWPFDocument doc = new XWPFDocument(is);
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
            text = extractor.getText();
            extractor.close();
        }
    } catch (OfficeXmlFileException e) {
        logger.error(filePath, e);
    } finally {
        if (is != null) {
            is.close();
        }
    }
    return text;
}

Example #3

Source File: MyExcelUtil.java From seed with Apache License 2.0

5 votes

/**
 * @param excelFile  Excel文件
 * @param modelClass 承载Excel数据的实体类
 * @param skipRows   指定跳过的行数：从0开始，传-1表示不跳过
 * Comment by 玄玉<https://jadyer.cn/> on 2019/8/15 19:35.
 */
public static <T> List<T> read(File excelFile, Class<T> modelClass, int skipRows){
    List<T> dataList;
    //初始化SaxExcelReader
    SaxExcelReader<T> saxExcelReader = SaxExcelReader.of(modelClass);
    //判断是否需要跳过行
    if(-1 < skipRows){
        saxExcelReader = saxExcelReader.rowFilter(row -> row.getRowNum() > skipRows);
    }
    try{
        //读文件
        dataList = saxExcelReader.read(excelFile);
    }catch (OfficeXmlFileException e){
        //若读取xls时报告格式错误，那就试试重命名为xlsx再读（有的excel文件头是2007版的，但文件名却是.xls结尾）
        String newFilePath = "unkonwnFile";
        if(excelFile.getName().endsWith(".xls")){
            newFilePath = excelFile.getPath() + "x";
        }
        if(excelFile.getName().endsWith(".xlsx")){
            newFilePath = excelFile.getPath().substring(0, excelFile.getPath().length()-1);
        }
        LogUtil.getLogger().warn("文件读取失败，异常信息为：{}。现尝试修改文件后缀名再重新读取一次，新文件名为：{}", e.getMessage(), newFilePath);
        File newFile = new File(newFilePath);
        excelFile.renameTo(newFile);
        dataList = saxExcelReader.read(newFile);
    }
    return dataList;
}

Example #4

Source File: VBAMacroReader.java From lams with GNU General Public License v2.0

5 votes

public VBAMacroReader(File file) throws IOException {
    try {
        this.fs = new NPOIFSFileSystem(file);
    } catch (OfficeXmlFileException e) {
        openOOXML(new FileInputStream(file));
    }
}

Example #5

Source File: ExcelPerfModeReader.java From azeroth with Apache License 2.0

5 votes

private List<String> readAsXLS(String path) {
    try {
        XLS2CSV xls2csv = new XLS2CSV(path, -1);
        return xls2csv.process();
    } catch (Exception e) {
        if (e instanceof NotOLE2FileException || e instanceof NotOfficeXmlFileException || e instanceof OfficeXmlFileException) {
            throw new ExcelOperBaseException("请选择正确格式excel文件");
        }
        if (e instanceof IOException) {
            throw new ExcelOperBaseException("文件读取失败");
        }
        throw new RuntimeException(e);
    }
}

Example #6

Source File: ExcelPerfModeReader.java From jeesuite-libs with Apache License 2.0

5 votes

private List<String> readAsXLS(String path){
	try {				
		XLS2CSV xls2csv = new XLS2CSV(path, -1);
		return xls2csv.process();
	} catch (Exception e) {
		if(e instanceof NotOLE2FileException || e instanceof NotOfficeXmlFileException || e instanceof OfficeXmlFileException){
			throw new ExcelOperBaseException("请选择正确格式excel文件");
		}
		if(e instanceof IOException){
			throw new ExcelOperBaseException("文件读取失败");
		}
		throw new RuntimeException(e);
	}
}

Example #7

Source File: MSPowerpointIndexerTest.java From carbon-apimgt with Apache License 2.0

5 votes

@Test(expected = SolrException.class)
public void testShouldThrowExceptionWhenFailToReadFile() throws Exception {
    PowerMockito.whenNew(POIFSFileSystem.class).withParameterTypes(InputStream.class)
            .withArguments(Mockito.any(InputStream.class))
            .thenThrow(OfficeXmlFileException.class);
    PowerMockito.whenNew(XMLSlideShow.class).withParameterTypes(InputStream.class)
            .withArguments(Mockito.any())
            .thenThrow(IOException.class);

    // SolrException is expected
    MSPowerpointIndexer indexer = new MSPowerpointIndexer();
    indexer.getIndexedDocument(file2Index);
}

Example #8

Source File: MSExcelIndexerTest.java From carbon-apimgt with Apache License 2.0

5 votes

@Test
public void testShouldReturnIndexedDocmentWhenParameterCorrect() {
    String excelText = "excel";
    Mockito.when(excelExtractor.getText())
            .thenReturn(excelText)
            .thenThrow(OfficeXmlFileException.class)
            .thenThrow(Exception.class);
    Mockito.when(xssfExtractor.getText()).thenReturn(excelText);

    try {
        // retrieving indexed document with ExcelExtractor
        msExcelIndexer.getIndexedDocument(file2Index);

        // switching the mediaType null check
        file2Index = new AsyncIndexer.File2Index("".getBytes(),
                null, "", -1234, "");

        // retrieving indexed document with MSExcelIndexer
        // Note: .thenReturn(excelText).thenThrow(OfficeXmlFileException.class) this switches the indexer
        msExcelIndexer.getIndexedDocument(file2Index);

        // switching to silent Exception catch block
        msExcelIndexer.getIndexedDocument(file2Index);
    } catch (Exception e) {
        Assert.fail("Should not throw any exceptions");
    }
}

Example #9

Source File: MSExcelIndexerTest.java From carbon-apimgt with Apache License 2.0

5 votes

@Test(expected = SolrException.class)
public void testShouldThrowExceptionWhenErrorOccurs() {
    Mockito.when(excelExtractor.getText()).thenThrow(OfficeXmlFileException.class);
    Mockito.when(xssfExtractor.getText()).thenThrow(IOException.class);

    // SolrException is expected
    msExcelIndexer.getIndexedDocument(file2Index);
}

Example #10

Source File: MSWordIndexerTest.java From carbon-apimgt with Apache License 2.0

5 votes

@Test(expected = SolrException.class)
public void testShouldThrowExceptionWhenFailToReadFile() throws Exception {
    PowerMockito.whenNew(POIFSFileSystem.class).withParameterTypes(InputStream.class)
            .withArguments(Mockito.any(InputStream.class))
            .thenThrow(OfficeXmlFileException.class);
    PowerMockito.whenNew(XWPFDocument.class).withParameterTypes(InputStream.class)
            .withArguments(Mockito.any())
            .thenThrow(IOException.class);

    // SolrException is expected
    MSWordIndexer indexer = new MSWordIndexer();
    indexer.getIndexedDocument(file2Index);
}

Example #11

Source File: HeaderBlock.java From lams with GNU General Public License v2.0

4 votes

private HeaderBlock(byte[] data) throws IOException {
   this._data = data.clone();
   
	// verify signature
   FileMagic fm = FileMagic.valueOf(data);
   
   switch (fm) {
   case OLE2:
       break;
   case OOXML:
          throw new OfficeXmlFileException("The supplied data appears to be in the Office 2007+ XML. "
              + "You are calling the part of POI that deals with OLE2 Office Documents. "
              + "You need to call a different part of POI to process this data (eg XSSF instead of HSSF)");
   case XML:
          throw new NotOLE2FileException("The supplied data appears to be a raw XML file. "
              + "Formats such as Office 2003 XML are not supported");
   case MSWRITE:
          throw new NotOLE2FileException("The supplied data appears to be in the old MS Write format. "
              + "Apache POI doesn't currently support this format");
      case BIFF2:
      case BIFF3:
      case BIFF4:
          throw new OldExcelFormatException("The supplied data appears to be in "+fm+" format. "
              + "HSSF only supports the BIFF8 format, try OldExcelExtractor");
   default:
          // Give a generic error if the OLE2 signature isn't found
       String exp = HexDump.longToHex(_signature);
       String act = HexDump.longToHex(LittleEndian.getLong(data, 0));
          throw new NotOLE2FileException(
              "Invalid header signature; read " + act + ", expected " + exp +
              " - Your file appears not to be a valid OLE2 document");
   }
   
	// Figure out our block size
	if (_data[30] == 12) {
		this.bigBlockSize = POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS;
	} else if(_data[30] == 9) {
		this.bigBlockSize = POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS;
	} else {
	   throw new IOException("Unsupported blocksize  (2^"+ _data[30] + "). Expected 2^9 or 2^12.");
	}

   // Setup the fields to read and write the counts and starts
     _bat_count  = new IntegerField(_bat_count_offset, data).get();
     _property_start = new IntegerField(_property_start_offset,_data).get();
     _sbat_start = new IntegerField(_sbat_start_offset, _data).get();
     _sbat_count = new IntegerField(_sbat_block_count_offset, _data).get();
     _xbat_start = new IntegerField(_xbat_start_offset, _data).get();
     _xbat_count = new IntegerField(_xbat_count_offset, _data).get();
}

Example #12

Source File: MSPowerpointIndexerTest.java From carbon-apimgt with Apache License 2.0

4 votes

@Test
public void testShouldReturnIndexedDocumentWhenParameterCorrect() throws Exception {
    POIFSFileSystem ppExtractor = Mockito.mock(POIFSFileSystem.class);
    PowerPointExtractor powerPointExtractor = Mockito.mock(PowerPointExtractor.class);
    XSLFPowerPointExtractor xslfExtractor = Mockito.mock(XSLFPowerPointExtractor.class);
    XMLSlideShow xmlSlideShow = Mockito.mock(XMLSlideShow.class);
    PowerMockito.whenNew(POIFSFileSystem.class).withParameterTypes(InputStream.class)
            .withArguments(Mockito.any(InputStream.class))
            .thenThrow(OfficeXmlFileException.class)
            .thenReturn(ppExtractor)
            .thenThrow(APIManagementException.class);
    PowerMockito.whenNew(PowerPointExtractor.class).withParameterTypes(POIFSFileSystem.class)
            .withArguments(ppExtractor).thenReturn(powerPointExtractor);
    PowerMockito.whenNew(XMLSlideShow.class).withParameterTypes(InputStream.class)
            .withArguments(Mockito.any())
            .thenReturn(xmlSlideShow);
    PowerMockito.whenNew(XSLFPowerPointExtractor.class).withArguments(xmlSlideShow).thenReturn(xslfExtractor);
    Mockito.when(powerPointExtractor.getText()).thenReturn("");
    Mockito.when(xslfExtractor.getText()).thenReturn("");
    MSPowerpointIndexer indexer = new MSPowerpointIndexer();

    IndexDocument ppDoc = indexer.getIndexedDocument(file2Index);

    // should return the default media type when media type is not defined in file2Index
    if (!"application/vnd.ms-powerpoint".equals(ppDoc.getFields().get(IndexingConstants.FIELD_MEDIA_TYPE).get(0))) {
        Assert.fail();
    }

    // should return the media type we have set in the file2Index
    file2Index.mediaType = "text/html";
    ppDoc = indexer.getIndexedDocument(file2Index);
    if (!"text/html".equals(ppDoc.getFields().get(IndexingConstants.FIELD_MEDIA_TYPE).get(0))) {
        Assert.fail();
    }

    // should return the media type we have set in the file2Index even if exception occurred while reading the file
    ppDoc = indexer.getIndexedDocument(file2Index);
    if (!"text/html".equals(ppDoc.getFields().get(IndexingConstants.FIELD_MEDIA_TYPE).get(0))) {
        Assert.fail();
    }
}

Example #13

Source File: MSWordIndexerTest.java From carbon-apimgt with Apache License 2.0

4 votes

@Test
public void testShouldReturnIndexedDocumentWhenParameterCorrect() throws Exception {
    POIFSFileSystem poiFS = Mockito.mock(POIFSFileSystem.class);
    WordExtractor wordExtractor = Mockito.mock(WordExtractor.class);
    XWPFWordExtractor xwpfExtractor = Mockito.mock(XWPFWordExtractor.class);
    XWPFDocument xwpfDocument = Mockito.mock(XWPFDocument.class);
    PowerMockito.whenNew(POIFSFileSystem.class).withParameterTypes(InputStream.class)
            .withArguments(Mockito.any(InputStream.class))
            .thenThrow(OfficeXmlFileException.class)
            .thenReturn(poiFS)
            .thenThrow(APIManagementException.class);
    PowerMockito.whenNew(WordExtractor.class).withArguments(poiFS).thenReturn(wordExtractor);
    PowerMockito.whenNew(XWPFDocument.class).withParameterTypes(InputStream.class)
            .withArguments(Mockito.any())
            .thenReturn(xwpfDocument);
    PowerMockito.whenNew(XWPFWordExtractor.class).withArguments(xwpfDocument).thenReturn(xwpfExtractor);
    Mockito.when(wordExtractor.getText()).thenReturn("");
    Mockito.when(xwpfExtractor.getText()).thenReturn("");
    MSWordIndexer indexer = new MSWordIndexer();

    IndexDocument wordDoc = indexer.getIndexedDocument(file2Index);

    // should return the default media type when media type is not defined in file2Index
    if (!"application/pdf".equals(wordDoc.getFields().get(IndexingConstants.FIELD_MEDIA_TYPE).get(0))) {
        Assert.fail();
    }

    // should return the media type we have set in the file2Index
    file2Index.mediaType = "text/html";
    wordDoc = indexer.getIndexedDocument(file2Index);
    if (!"text/html".equals(wordDoc.getFields().get(IndexingConstants.FIELD_MEDIA_TYPE).get(0))) {
        Assert.fail();
    }

    // should return the media type we have set in the file2Index even if exception occurred while reading the file
    file2Index.mediaType = "text/html";
    wordDoc = indexer.getIndexedDocument(file2Index);
    if (!"text/html".equals(wordDoc.getFields().get(IndexingConstants.FIELD_MEDIA_TYPE).get(0))) {
        Assert.fail();
    }
}