org.apache.poi.poifs.filesystem.FileMagic Java Examples

The following examples show how to use org.apache.poi.poifs.filesystem.FileMagic. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SaxExcelReader.java    From myexcel with Apache License 2.0 6 votes vote down vote up
private void doRead(File file) {
    FileMagic fm;
    try (InputStream is = FileMagic.prepareToCheckMagic(new FileInputStream(file))) {
        fm = FileMagic.valueOf(is);
    } catch (Throwable throwable) {
        throw new SaxReadException("Fail to get excel magic", throwable);
    }
    try {
        switch (fm) {
            case OOXML:
                doReadXlsx(file);
                break;
            case OLE2:
                doReadXls(file);
                break;
            default:
                doReadCsv(file);
        }
    } catch (Throwable e) {
        throw new SaxReadException("Fail to read excel", e);
    }
}
 
Example #2
Source File: VBAMacroReader.java    From lams with GNU General Public License v2.0 5 votes vote down vote up
public VBAMacroReader(InputStream rstream) throws IOException {
    InputStream is = FileMagic.prepareToCheckMagic(rstream);
    FileMagic fm = FileMagic.valueOf(is);
    if (fm == FileMagic.OLE2) {
        fs = new NPOIFSFileSystem(is);
    } else {
        openOOXML(is);
    }
}
 
Example #3
Source File: OLE2Bleach.java    From DocBleach with MIT License 5 votes vote down vote up
@Override
public boolean handlesMagic(InputStream stream) {
  try {
    return stream.available() > 4 && FileMagic.valueOf(stream) == FileMagic.OLE2;
  } catch (Exception e) {
    LOGGER.warn("An exception occured", e);
    return false;
  }
}
 
Example #4
Source File: OOXMLBleach.java    From DocBleach with MIT License 5 votes vote down vote up
@Override
public boolean handlesMagic(InputStream stream) {
  try {
    return stream.available() > 4 && FileMagic.valueOf(stream) == FileMagic.OOXML;
  } catch (Exception e) {
    LOGGER.warn("An exception occured", e);
    return false;
  }
}
 
Example #5
Source File: XlsSchemaParser.java    From data-prep with Apache License 2.0 5 votes vote down vote up
/**
 * Parse all xls sheets.
 *
 * @param request the schema parser request.
 * @return the list of parsed xls sheet.
 * @throws IOException if an error occurs.
 */
protected List<Schema.SheetContent> parseAllSheets(Request request) throws IOException {
    InputStream inputStream = request.getContent();
    if (!inputStream.markSupported()) {
        inputStream = FileMagic.prepareToCheckMagic(inputStream);
    }
    boolean newExcelFormat = XlsUtils.isNewExcelFormat(inputStream);
    // parse the xls input stream using the correct format
    if (newExcelFormat) {
        return parseAllSheetsStream(new Request(inputStream, request.getMetadata()));
    } else {
        return parseAllSheetsOldFormat(new Request(inputStream, request.getMetadata()));
    }
}
 
Example #6
Source File: HeaderBlock.java    From lams with GNU General Public License v2.0 4 votes vote down vote up
private HeaderBlock(byte[] data) throws IOException {
   this._data = data.clone();
   
	// verify signature
   FileMagic fm = FileMagic.valueOf(data);
   
   switch (fm) {
   case OLE2:
       break;
   case OOXML:
          throw new OfficeXmlFileException("The supplied data appears to be in the Office 2007+ XML. "
              + "You are calling the part of POI that deals with OLE2 Office Documents. "
              + "You need to call a different part of POI to process this data (eg XSSF instead of HSSF)");
   case XML:
          throw new NotOLE2FileException("The supplied data appears to be a raw XML file. "
              + "Formats such as Office 2003 XML are not supported");
   case MSWRITE:
          throw new NotOLE2FileException("The supplied data appears to be in the old MS Write format. "
              + "Apache POI doesn't currently support this format");
      case BIFF2:
      case BIFF3:
      case BIFF4:
          throw new OldExcelFormatException("The supplied data appears to be in "+fm+" format. "
              + "HSSF only supports the BIFF8 format, try OldExcelExtractor");
   default:
          // Give a generic error if the OLE2 signature isn't found
       String exp = HexDump.longToHex(_signature);
       String act = HexDump.longToHex(LittleEndian.getLong(data, 0));
          throw new NotOLE2FileException(
              "Invalid header signature; read " + act + ", expected " + exp +
              " - Your file appears not to be a valid OLE2 document");
   }
   
	// Figure out our block size
	if (_data[30] == 12) {
		this.bigBlockSize = POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS;
	} else if(_data[30] == 9) {
		this.bigBlockSize = POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS;
	} else {
	   throw new IOException("Unsupported blocksize  (2^"+ _data[30] + "). Expected 2^9 or 2^12.");
	}

   // Setup the fields to read and write the counts and starts
     _bat_count  = new IntegerField(_bat_count_offset, data).get();
     _property_start = new IntegerField(_property_start_offset,_data).get();
     _sbat_start = new IntegerField(_sbat_start_offset, _data).get();
     _sbat_count = new IntegerField(_sbat_block_count_offset, _data).get();
     _xbat_start = new IntegerField(_xbat_start_offset, _data).get();
     _xbat_count = new IntegerField(_xbat_count_offset, _data).get();
}
 
Example #7
Source File: OptionsFileConverterUtil.java    From sakai with Educational Community License v2.0 4 votes vote down vote up
public static List<String> convertInputStreamToOptionList(InputStream in) throws IOException {
    List<String> optionsList = new ArrayList<String>();
    try (BufferedInputStream bufferedInputStream = new BufferedInputStream(in)) {
        Iterator<Row> iterator;
        switch(FileMagic.valueOf(bufferedInputStream)) {
            case OOXML:
                log.debug("Input file detected as OOXML.");
                XSSFWorkbook workbook = new XSSFWorkbook(bufferedInputStream);
                XSSFSheet datatypeSheet = workbook.getSheetAt(0);
                iterator = datatypeSheet.iterator();
                break;
            case OLE2:
                log.debug("Input file detected as OLE2.");
                HSSFWorkbook lagacyWorkbook = new HSSFWorkbook(bufferedInputStream);
                HSSFSheet legacyDatatypeSheet = lagacyWorkbook.getSheetAt(0);
                iterator = legacyDatatypeSheet.iterator();
                break;
            default:
                log.debug("Input file detected as UNKNOWN, try to open it as text and ignore if it's not ASCII text.");
                try(Scanner scanner = new Scanner(bufferedInputStream).useDelimiter("\\r\\n")) {
                    while(scanner.hasNext()){
                        String inputString = HtmlUtils.htmlEscape(scanner.next(), "UTF-8");
                        if(StringUtils.isNotBlank(inputString)){
                            optionsList.add(inputString);
                        }
                    }
                } catch(Exception ex){
                    throw new IOException("Error processing the file as text type.", ex);
                }
                return optionsList;
        }

        while (iterator.hasNext()) {

            Row currentRow = iterator.next();
            Iterator<Cell> cellIterator = currentRow.iterator();
            if(cellIterator.hasNext()) {
                Cell currentCell = cellIterator.next();
                switch(currentCell.getCellType()) {
                    case STRING:
                        if (StringUtils.isNotBlank(currentCell.getStringCellValue())) {
                            optionsList.add(HtmlUtils.htmlEscape(currentCell.getStringCellValue(), "UTF-8"));
                        }
                        break;
                    case NUMERIC:
                         optionsList.add(String.valueOf(currentCell.getNumericCellValue()));
                         break;
                    case BOOLEAN:
                        optionsList.add(currentCell.getBooleanCellValue() ? "1" : "0");
                        break;
                    case FORMULA:
                    case BLANK:
                    case _NONE:
                    case ERROR:
                    default:
                        break;
                }
            }
        }
    } catch (Exception e) {
        throw new IOException("Error converting the file to options list.");
    }

    return optionsList;
}
 
Example #8
Source File: OptionsFileConverterUtil.java    From sakai with Educational Community License v2.0 4 votes vote down vote up
public static List<String> convertInputStreamToOptionList(InputStream in) throws IOException {
    List<String> optionsList = new ArrayList<String>();
    try (BufferedInputStream bufferedInputStream = new BufferedInputStream(in)) {
        Iterator<Row> iterator;
        switch(FileMagic.valueOf(bufferedInputStream)) {
            case OOXML:
                log.debug("Input file detected as OOXML.");
                XSSFWorkbook workbook = new XSSFWorkbook(bufferedInputStream);
                XSSFSheet datatypeSheet = workbook.getSheetAt(0);
                iterator = datatypeSheet.iterator();
                break;
            case OLE2:
                log.debug("Input file detected as OLE2.");
                HSSFWorkbook lagacyWorkbook = new HSSFWorkbook(bufferedInputStream);
                HSSFSheet legacyDatatypeSheet = lagacyWorkbook.getSheetAt(0);
                iterator = legacyDatatypeSheet.iterator();
                break;
            default:
                log.debug("Input file detected as UNKNOWN, try to open it as text and ignore if it's not ASCII text.");
                try(Scanner scanner = new Scanner(bufferedInputStream).useDelimiter("\\r\\n")) {
                    while(scanner.hasNext()){
                        String inputString = HtmlUtils.htmlEscape(scanner.next(), "UTF-8");
                        if(StringUtils.isNotBlank(inputString)){
                            optionsList.add(inputString);
                        }
                    }
                } catch(Exception ex){
                    throw new IOException("Error processing the file as text type.", ex);
                }
                return optionsList;
        }

        while (iterator.hasNext()) {

            Row currentRow = iterator.next();
            Iterator<Cell> cellIterator = currentRow.iterator();
            if(cellIterator.hasNext()) {
                Cell currentCell = cellIterator.next();
                switch(currentCell.getCellType()) {
                    case STRING:
                        if (StringUtils.isNotBlank(currentCell.getStringCellValue())) {
                            optionsList.add(HtmlUtils.htmlEscape(currentCell.getStringCellValue(), "UTF-8"));
                        }
                        break;
                    case NUMERIC:
                         optionsList.add(String.valueOf(currentCell.getNumericCellValue()));
                         break;
                    case BOOLEAN:
                        optionsList.add(currentCell.getBooleanCellValue() ? "1" : "0");
                        break;
                    case FORMULA:
                    case BLANK:
                    case _NONE:
                    case ERROR:
                    default:
                        break;
                }
            }
        }
    } catch (Exception e) {
        throw new IOException("Error converting the file to options list.");
    }

    return optionsList;
}
 
Example #9
Source File: SlideShowFactory.java    From lams with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Creates the appropriate HSLFSlideShow / XMLSlideShow from
 *  the given InputStream, which may be password protected.
 *  
 * <p>Note that using an {@link InputStream} has a higher memory footprint
 *  than using a {@link File}.</p>
 *
 * <p>Note that in order to properly release resources the
 *  SlideShow should be closed after use. Note also that loading
 *  from an InputStream requires more memory than loading
 *  from a File, so prefer {@link #create(File)} where possible.</p>
 *
 *  @param inp The {@link InputStream} to read data from.
 *  @param password The password that should be used or null if no password is necessary.
 *
 *  @return The created SlideShow
 *
 *  @throws IOException if an error occurs while reading the data
 *  @throws EncryptedDocumentException If the wrong password is given for a protected file
 */
public static SlideShow<?,?> create(InputStream inp, String password) throws IOException, EncryptedDocumentException {
    InputStream is = FileMagic.prepareToCheckMagic(inp);
    FileMagic fm = FileMagic.valueOf(is);
    
    switch (fm) {
    case OLE2:
        NPOIFSFileSystem fs = new NPOIFSFileSystem(is);
        return create(fs, password);
    case OOXML:
        return createXSLFSlideShow(is);
    default:
        throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
    }
}
 
Example #10
Source File: XlsUtils.java    From data-prep with Apache License 2.0 2 votes vote down vote up
/**
 * Detect the excel format, based only on the first bytes of the input stream (leaving the stream untouched).
 *
 * @param inputStream the xls input stream, which supports mark/reset.
 * @return true if the given input stream is a xlsx (new format), otherwise false.
 * @throws IOException if the format cannot be determined.
 */
public static boolean isNewExcelFormat(InputStream inputStream) throws IOException {
    return FileMagic.valueOf(inputStream) == FileMagic.OOXML;
}