Java Code Examples for org.apache.poi.util.IOUtils#readFully()

The following examples show how to use org.apache.poi.util.IOUtils#readFully() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TikaOfficeDetectParser.java    From alfresco-repository with GNU Lesser General Public License v3.0 5 votes vote down vote up
public void parse(InputStream stream,
      ContentHandler handler, Metadata metadata,
      ParseContext parseContext) throws IOException, SAXException,
      TikaException 
{
   byte[] initial4 = new byte[4];
   InputStream wrapped;
   // Preserve TikaInputStreams as TikaInputStreams as they require less memory to process
   if (stream.markSupported())
   {
      stream.mark(initial4.length);
      IOUtils.readFully(stream, initial4);
      stream.reset();
      wrapped = stream;
   }
   else
   {
      PushbackInputStream inp = new PushbackInputStream(stream, 4);
      IOUtils.readFully(inp, initial4);
      inp.unread(initial4);
      wrapped = inp;
   }
   
   // Which is it?
   if(initial4[0] == POIFSConstants.OOXML_FILE_HEADER[0] &&
      initial4[1] == POIFSConstants.OOXML_FILE_HEADER[1] &&
      initial4[2] == POIFSConstants.OOXML_FILE_HEADER[2] &&
      initial4[3] == POIFSConstants.OOXML_FILE_HEADER[3])
   {
      ooxmlParser.parse(wrapped, handler, metadata, parseContext);
   }
   else
   {
      ole2Parser.parse(wrapped, handler, metadata, parseContext);
   }
}
 
Example 2
Source File: HeaderBlock.java    From lams with GNU General Public License v2.0 5 votes vote down vote up
/**
 * create a new HeaderBlockReader from an InputStream
 *
 * @param stream the source InputStream
 *
 * @exception IOException on errors or bad data
 */
public HeaderBlock(InputStream stream) throws IOException {
	// Grab the first 512 bytes
    // (For 4096 sized blocks, the remaining 3584 bytes are zero)
	// Then, process the contents
	this(readFirst512(stream));
	
	// Fetch the rest of the block if needed
	if(bigBlockSize.getBigBlockSize() != 512) {
	   int rest = bigBlockSize.getBigBlockSize() - 512;
	   byte[] tmp = new byte[rest];
	   IOUtils.readFully(stream, tmp);
	}
}
 
Example 3
Source File: HeaderBlock.java    From lams with GNU General Public License v2.0 5 votes vote down vote up
private static byte[] readFirst512(InputStream stream) throws IOException {
     // Grab the first 512 bytes
     // (For 4096 sized blocks, the remaining 3584 bytes are zero)
     byte[] data = new byte[512];
     int bsCount = IOUtils.readFully(stream, data);
     if(bsCount != 512) {
        throw alertShortRead(bsCount, 512);
     }
     return data;
}
 
Example 4
Source File: DocumentBlock.java    From lams with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Create a single instance initialized with data.
 *
 * @param stream the InputStream delivering the data.
 *
 * @exception IOException
 */

public DocumentBlock(final InputStream stream, POIFSBigBlockSize bigBlockSize)
    throws IOException
{
    this(bigBlockSize);
    int count = IOUtils.readFully(stream, _data);

    _bytes_read = (count == -1) ? 0
                                : count;
}
 
Example 5
Source File: RawDataBlock.java    From lams with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Constructor RawDataBlock
 *
 * @param stream the InputStream from which the data will be read
 * @param blockSize the size of the POIFS blocks, normally 512 bytes
 * {@link org.apache.poi.poifs.common.POIFSConstants#SMALLER_BIG_BLOCK_SIZE}
 *
 * @exception IOException on I/O errors, and if an insufficient
 *            amount of data is read (the InputStream must
 *            be an exact multiple of the block size)
 */
public RawDataBlock(final InputStream stream, int blockSize)
		throws IOException {
    _data = new byte[ blockSize ];
    int count = IOUtils.readFully(stream, _data);
    _hasData = (count > 0);

    if (count == -1) {
        _eof = true;
    }
    else if (count != blockSize) {
    	// IOUtils.readFully will always read the
    	//  requested number of bytes, unless it hits
    	//  an EOF
        _eof = true;
        String type = " byte" + ((count == 1) ? ("")
                                              : ("s"));

        log.log(POILogger.ERROR,
        		"Unable to read entire block; " + count
                 + type + " read before EOF; expected "
                 + blockSize + " bytes. Your document "
                 + "was either written by software that "
                 + "ignores the spec, or has been truncated!"
        );
    }
    else {
        _eof = false;
    }
}
 
Example 6
Source File: FileBackedDataSource.java    From lams with GNU General Public License v2.0 5 votes vote down vote up
@Override
public ByteBuffer read(int length, long position) throws IOException {
   if(position >= size()) {
      throw new IndexOutOfBoundsException("Position " + position + " past the end of the file");
   }
   
   // TODO Could we do the read-only case with MapMode.PRIVATE instead?
   // See https://docs.oracle.com/javase/7/docs/api/java/nio/channels/FileChannel.MapMode.html#PRIVATE
   // Or should we have 3 modes instead of the current boolean - 
   //  read-write, read-only, read-to-write-elsewhere? 
   
   // Do we read or map (for read/write)?
   ByteBuffer dst;
   if (writable) {
       dst = channel.map(FileChannel.MapMode.READ_WRITE, position, length);

       // remember this buffer for cleanup
       buffersToClean.add(dst);
   } else {
       // allocate the buffer on the heap if we cannot map the data in directly
       channel.position(position);
       dst = ByteBuffer.allocate(length);

       // Read the contents and check that we could read some data
       int worked = IOUtils.readFully(channel, dst);
       if(worked == -1) {
           throw new IndexOutOfBoundsException("Position " + position + " past the end of the file");
       }
   }

   // make it ready for reading
   dst.position(0);

   // All done
   return dst;
}
 
Example 7
Source File: MimetypeMapContentTest.java    From alfresco-repository with GNU Lesser General Public License v3.0 4 votes vote down vote up
public void testGuessMimetypeForFile() throws Exception
{
    // Correct ones
    assertEquals(
            "application/msword", 
            mimetypeService.guessMimetype("something.doc", openQuickTestFile("quick.doc"))
    );
    assertEquals(
            "application/msword", 
            mimetypeService.guessMimetype("SOMETHING.DOC", openQuickTestFile("quick.doc"))
    );
    
    // Incorrect ones, Tika spots the mistake
    assertEquals(
            "application/msword", 
            mimetypeService.guessMimetype("something.pdf", openQuickTestFile("quick.doc"))
    );
    assertEquals(
            "application/pdf", 
            mimetypeService.guessMimetype("something.doc", openQuickTestFile("quick.pdf"))
    );
    
    // Ones where we use a different mimetype to the canonical one
    assertEquals(
            "image/bmp", // Officially image/x-ms-bmp 
            mimetypeService.guessMimetype("image.bmp", openQuickTestFile("quick.bmp"))
    );
    
    // Ones where we know about the parent, and Tika knows about the details
    assertEquals(
          "application/dita+xml", // Full version:  application/dita+xml;format=concept
          mimetypeService.guessMimetype("concept.dita", openQuickTestFile("quickConcept.dita"))
    );
    
    // Alfresco Specific ones, that Tika doesn't know about
    assertEquals(
          "application/acp", 
          mimetypeService.guessMimetype("something.acp", openQuickTestFile("quick.acp"))
    );

    
    // Where the file is corrupted
    File tmp = File.createTempFile("alfresco", ".tmp");
    ContentReader reader = openQuickTestFile("quick.doc");
    InputStream inp = reader.getContentInputStream();
    byte[] trunc = new byte[512+256];
    IOUtils.readFully(inp, trunc);
    inp.close();
    FileOutputStream out = new FileOutputStream(tmp);
    out.write(trunc);
    out.close();
    ContentReader truncReader = new FileContentReader(tmp);
    
    // Because the file is truncated, Tika won't be able to process the contents
    //  of the OLE2 structure
    // So, it'll fall back to just OLE2, but it won't fail
    assertEquals(
            "application/x-tika-msoffice", 
            mimetypeService.guessMimetype(null, truncReader)
    );
    // But with the filename it'll be able to use the .doc extension
    //  to guess at it being a .Doc file
    assertEquals(
          "application/msword", 
          mimetypeService.guessMimetype("something.doc", truncReader)
    );
    
    // Lotus notes EML files (ALF-16381 / TIKA-1042)
    assertEquals(
          "message/rfc822", 
          mimetypeService.guessMimetype("something.eml", openQuickTestFile("quickLotus.eml"))
    );
}
 
Example 8
Source File: NPOIFSFileSystem.java    From lams with GNU General Public License v2.0 4 votes vote down vote up
/**
 * Create a POIFSFileSystem from an <tt>InputStream</tt>.  Normally the stream is read until
 * EOF.  The stream is always closed.<p>
 *
 * Some streams are usable after reaching EOF (typically those that return <code>true</code>
 * for <tt>markSupported()</tt>).  In the unlikely case that the caller has such a stream
 * <i>and</i> needs to use it after this constructor completes, a work around is to wrap the
 * stream in order to trap the <tt>close()</tt> call.  A convenience method (
 * <tt>createNonClosingInputStream()</tt>) has been provided for this purpose:
 * <pre>
 * InputStream wrappedStream = POIFSFileSystem.createNonClosingInputStream(is);
 * HSSFWorkbook wb = new HSSFWorkbook(wrappedStream);
 * is.reset();
 * doSomethingElse(is);
 * </pre>
 * Note also the special case of <tt>ByteArrayInputStream</tt> for which the <tt>close()</tt>
 * method does nothing.
 * <pre>
 * ByteArrayInputStream bais = ...
 * HSSFWorkbook wb = new HSSFWorkbook(bais); // calls bais.close() !
 * bais.reset(); // no problem
 * doSomethingElse(bais);
 * </pre>
 *
 * @param stream the InputStream from which to read the data
 *
 * @exception IOException on errors reading, or on invalid data
 */

public NPOIFSFileSystem(InputStream stream)
    throws IOException
{
    this(false);
    
    ReadableByteChannel channel = null;
    boolean success = false;
    
    try {
       // Turn our InputStream into something NIO based
       channel = Channels.newChannel(stream);
       
       // Get the header
       ByteBuffer headerBuffer = ByteBuffer.allocate(POIFSConstants.SMALLER_BIG_BLOCK_SIZE);
       IOUtils.readFully(channel, headerBuffer);
       
       // Have the header processed
       _header = new HeaderBlock(headerBuffer);
       
       // Sanity check the block count
       BlockAllocationTableReader.sanityCheckBlockCount(_header.getBATCount());
   
       // We need to buffer the whole file into memory when
       //  working with an InputStream.
       // The max possible size is when each BAT block entry is used
       long maxSize = BATBlock.calculateMaximumSize(_header); 
       if (maxSize > Integer.MAX_VALUE) {
           throw new IllegalArgumentException("Unable read a >2gb file via an InputStream");
       }
       ByteBuffer data = ByteBuffer.allocate((int)maxSize);
       
       // Copy in the header
       headerBuffer.position(0);
       data.put(headerBuffer);
       data.position(headerBuffer.capacity());
       
       // Now read the rest of the stream
       IOUtils.readFully(channel, data);
       success = true;
       
       // Turn it into a DataSource
       _data = new ByteArrayBackedDataSource(data.array(), data.position());
    } finally {
       // As per the constructor contract, always close the stream
       if(channel != null)
          channel.close();
       closeInputStream(stream, success);
    }
    
    // Now process the various entries
    readCoreContents();
}
 
Example 9
Source File: VBAMacroReader.java    From lams with GNU General Public License v2.0 4 votes vote down vote up
private String readUnicodeString(RLEDecompressingInputStream in, int unicodeNameRecordLength) throws IOException {
    byte[] buffer = new byte[unicodeNameRecordLength];
    IOUtils.readFully(in, buffer);
    return new String(buffer, UTF_16LE);
}