org.apache.pdfbox.cos.COSDocument Java Examples

The following examples show how to use org.apache.pdfbox.cos.COSDocument. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FDFParser.java    From gcs with Mozilla Public License 2.0 6 votes vote down vote up
private void init() throws IOException
{
    String eofLookupRangeStr = System.getProperty(SYSPROP_EOFLOOKUPRANGE);
    if (eofLookupRangeStr != null)
    {
        try
        {
            setEOFLookupRange(Integer.parseInt(eofLookupRangeStr));
        }
        catch (NumberFormatException nfe)
        {
            LOG.warn("System property " + SYSPROP_EOFLOOKUPRANGE
                    + " does not contain an integer value, but: '" + eofLookupRangeStr + "'");
        }
    }
    document = new COSDocument();
}
 
Example #2
Source File: PDFParser.java    From gcs with Mozilla Public License 2.0 6 votes vote down vote up
private void init(ScratchFile scratchFile) throws IOException
{
    String eofLookupRangeStr = System.getProperty(SYSPROP_EOFLOOKUPRANGE);
    if (eofLookupRangeStr != null)
    {
        try
        {
            setEOFLookupRange(Integer.parseInt(eofLookupRangeStr));
        }
        catch (NumberFormatException nfe)
        {
            LOG.warn("System property " + SYSPROP_EOFLOOKUPRANGE
                    + " does not contain an integer value, but: '" + eofLookupRangeStr + "'");
        }
    }
    document = new COSDocument(scratchFile);
}
 
Example #3
Source File: COSWriter.java    From gcs with Mozilla Public License 2.0 6 votes vote down vote up
/**
 * This will write the body of the document.
 *
 * @param doc The document to write the body for.
 *
 * @throws IOException If there is an error writing the data.
 */
protected void doWriteBody(COSDocument doc) throws IOException
{
    COSDictionary trailer = doc.getTrailer();
    COSDictionary root = trailer.getCOSDictionary(COSName.ROOT);
    COSDictionary info = trailer.getCOSDictionary(COSName.INFO);
    COSDictionary encrypt = trailer.getCOSDictionary(COSName.ENCRYPT);
    if( root != null )
    {
        addObjectToWrite( root );
    }
    if( info != null )
    {
        addObjectToWrite( info );
    }

    doWriteObjects();
    willEncrypt = false;
    if( encrypt != null )
    {
        addObjectToWrite( encrypt );
    }

    doWriteObjects();
}
 
Example #4
Source File: ExtractTextHelper.java    From o2oa with GNU Affero General Public License v3.0 5 votes vote down vote up
public static String pdf(byte[] bytes) {
	try {
		PDFParser parser = new PDFParser(new RandomAccessBuffer(bytes));
		parser.parse();
		try (COSDocument cos = parser.getDocument(); PDDocument pd = new PDDocument(cos)) {
			PDFTextStripper stripper = new PDFTextStripper();
			stripper.setStartPage(1);
			stripper.setEndPage(pd.getNumberOfPages());
			return stripper.getText(pd);
		}
	} catch (Exception e) {
		logger.error(e);
	}
	return null;
}
 
Example #5
Source File: PDF2TextExample.java    From tutorials with MIT License 5 votes vote down vote up
private static void generateTxtFromPDF(String filename) throws IOException {
	File f = new File(filename);
	String parsedText;
	PDFParser parser = new PDFParser(new RandomAccessFile(f, "r"));
	parser.parse();

	COSDocument cosDoc = parser.getDocument();

	PDFTextStripper pdfStripper = new PDFTextStripper();
	PDDocument pdDoc = new PDDocument(cosDoc);

	parsedText = pdfStripper.getText(pdDoc);

	if (cosDoc != null)
		cosDoc.close();
	if (pdDoc != null)
		pdDoc.close();

	PrintWriter pw = new PrintWriter("src/output/pdf.txt");
	pw.print(parsedText);
	pw.close();
}
 
Example #6
Source File: PDFIndexerTest.java    From carbon-apimgt with Apache License 2.0 5 votes vote down vote up
@Test
public void testShouldReturnIndexedDocumentWhenParameterCorrect() throws IOException {
    String mediaType = "application/pdf+test";
    final String MEDIA_TYPE = "mediaType";
    PDFParser parser = Mockito.mock(PDFParser.class);
    COSDocument cosDoc = Mockito.mock(COSDocument.class);
    PDFTextStripper pdfTextStripper = Mockito.mock(PDFTextStripper.class);
    Mockito.doThrow(IOException.class).when(cosDoc).close();
    Mockito.when(parser.getDocument()).thenReturn(new COSDocument()).thenReturn(cosDoc);
    Mockito.when(pdfTextStripper.getText(new PDDocument())).thenReturn("");
    PDFIndexer pdfIndexer = new PDFIndexerWrapper(parser, pdfTextStripper);

    // should return the default media type when media type is not defined in file2Index
    IndexDocument pdf = pdfIndexer.getIndexedDocument(file2Index);
    if (!"application/pdf".equals(pdf.getFields().get(MEDIA_TYPE).get(0))) {
        Assert.fail();
    }

    // should return the media type we have set in the file2Index even if error occurs in finally block
    file2Index.mediaType = mediaType;
    pdf = pdfIndexer.getIndexedDocument(file2Index);
    if (!mediaType.equals(pdf.getFields().get(MEDIA_TYPE).get(0))) {
        Assert.fail();
    }

}
 
Example #7
Source File: COSWriter.java    From gcs with Mozilla Public License 2.0 5 votes vote down vote up
/**
 * This will write the fdf document.
 *
 * @param doc The document to write.
 *
 * @throws IOException If an error occurs while generating the data.
 */
public void write(FDFDocument doc) throws IOException
{
    fdfDocument = doc;
    willEncrypt = false;
    COSDocument cosDoc = fdfDocument.getDocument();
    cosDoc.accept(this);
}
 
Example #8
Source File: COSWriter.java    From gcs with Mozilla Public License 2.0 5 votes vote down vote up
/**
 * This will write the trailer to the PDF document.
 *
 * @param doc The document to create the trailer for.
 *
 * @throws IOException If there is an IOError while writing the document.
 */
protected void doWriteTrailer(COSDocument doc) throws IOException
{
    getStandardOutput().write(TRAILER);
    getStandardOutput().writeEOL();

    COSDictionary trailer = doc.getTrailer();
    //sort xref, needed only if object keys not regenerated
    Collections.sort(getXRefEntries());
    COSWriterXRefEntry lastEntry = getXRefEntries().get( getXRefEntries().size()-1);
    trailer.setLong(COSName.SIZE, lastEntry.getKey().getNumber()+1);
    // Only need to stay, if an incremental update will be performed
    if (!incrementalUpdate) 
    {
      trailer.removeItem( COSName.PREV );
    }
    if (!doc.isXRefStream())
    {
        trailer.removeItem( COSName.XREF_STM );
    }
    // Remove a checksum if present
    trailer.removeItem( COSName.DOC_CHECKSUM );

    COSArray idArray = trailer.getCOSArray(COSName.ID);
    if (idArray != null)
    {
        idArray.setDirect(true);
    }

    trailer.accept(this);
}
 
Example #9
Source File: COSWriter.java    From gcs with Mozilla Public License 2.0 5 votes vote down vote up
private void prepareIncrement(PDDocument doc)
{
  try
  {
    if (doc != null)
    {
      COSDocument cosDoc = doc.getDocument();
      
      Map<COSObjectKey, Long> xrefTable = cosDoc.getXrefTable();
      Set<COSObjectKey> keySet = xrefTable.keySet();
      long highestNumber = doc.getDocument().getHighestXRefObjectNumber();
      for ( COSObjectKey cosObjectKey : keySet ) 
      {
        COSBase object = cosDoc.getObjectFromPool(cosObjectKey).getObject();
        if (object != null && cosObjectKey!= null && !(object instanceof COSNumber))
        {
            objectKeys.put(object, cosObjectKey);
            keyObject.put(cosObjectKey,object);
        }
        
        if (cosObjectKey != null)
        {
            long num = cosObjectKey.getNumber();
            if (num > highestNumber)
            {
                highestNumber = num;
            }
        }
      }
      setNumber(highestNumber);
    }
  }
  catch (IOException e)
  {
      LOG.error(e,e);
  }
}
 
Example #10
Source File: PDFTemplateCreator.java    From gcs with Mozilla Public License 2.0 5 votes vote down vote up
private InputStream getVisualSignatureAsStream(COSDocument visualSignature) throws IOException
{
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    COSWriter writer = new COSWriter(baos);
    writer.write(visualSignature);
    writer.close();
    return new ByteArrayInputStream(baos.toByteArray());
}
 
Example #11
Source File: FDFDocument.java    From gcs with Mozilla Public License 2.0 5 votes vote down vote up
/**
 * Constructor, creates a new FDF document.
 */
public FDFDocument()
{
    document = new COSDocument();
    document.setVersion(1.2f);

    // First we need a trailer
    document.setTrailer(new COSDictionary());

    // Next we need the root dictionary.
    FDFCatalog catalog = new FDFCatalog();
    setCatalog(catalog);
}
 
Example #12
Source File: ExtractTextTools.java    From o2oa with GNU Affero General Public License v3.0 5 votes vote down vote up
public static String pdf(byte[] bytes) {
	try {
		PDFParser parser = new PDFParser(new RandomAccessBuffer(bytes));
		parser.parse();
		try (COSDocument cos = parser.getDocument(); PDDocument pd = new PDDocument(cos)) {
			PDFTextStripper stripper = new PDFTextStripper();
			stripper.setStartPage(1);
			stripper.setEndPage(pd.getNumberOfPages());
			return stripper.getText(pd);
		}
	} catch (Exception e) {
		logger.error(e);
	}
	return null;
}
 
Example #13
Source File: ExtractTextHelper.java    From o2oa with GNU Affero General Public License v3.0 5 votes vote down vote up
public static String pdf(byte[] bytes) {
	try {
		PDFParser parser = new PDFParser(new RandomAccessBuffer(bytes));
		parser.parse();
		try (COSDocument cos = parser.getDocument(); PDDocument pd = new PDDocument(cos)) {
			PDFTextStripper stripper = new PDFTextStripper();
			stripper.setStartPage(1);
			stripper.setEndPage(pd.getNumberOfPages());
			return stripper.getText(pd);
		}
	} catch (Exception e) {
		logger.error(e);
	}
	return null;
}
 
Example #14
Source File: PDFXRefStream.java    From gcs with Mozilla Public License 2.0 5 votes vote down vote up
/**
 * Create a fresh XRef stream like for a fresh file or an incremental update.
 * 
 * @param cosDocument
 */
public PDFXRefStream(COSDocument cosDocument)
{
    stream = cosDocument.createCOSStream();
    streamData = new TreeMap<Long, Object>();
    objectNumbers = new TreeSet<Long>();
}
 
Example #15
Source File: COSParser.java    From gcs with Mozilla Public License 2.0 5 votes vote down vote up
/**
 * This will get the document that was parsed. The document must be parsed before this is called. When you are done
 * with this document you must call close() on it to release resources.
 *
 * @return The document that was parsed.
 *
 * @throws IOException If there is an error getting the document.
 */
public COSDocument getDocument() throws IOException
{
    if( document == null )
    {
        throw new IOException("You must parse the document first before calling getDocument()");
    }
    return document;
}
 
Example #16
Source File: PDDocument.java    From gcs with Mozilla Public License 2.0 4 votes vote down vote up
private void prepareVisibleSignature(PDSignatureField signatureField, PDAcroForm acroForm, 
        COSDocument visualSignature)
{
    // Obtain visual signature object
    boolean annotNotFound = true;
    boolean sigFieldNotFound = true;
    for (COSObject cosObject : visualSignature.getObjects())
    {
        if (!annotNotFound && !sigFieldNotFound)
        {
            break;
        }
        
        COSBase base = cosObject.getObject();
        if (base instanceof COSDictionary)
        {
            COSDictionary cosBaseDict = (COSDictionary) base;

            // Search for signature annotation
            COSBase type = cosBaseDict.getDictionaryObject(COSName.TYPE);
            if (annotNotFound && COSName.ANNOT.equals(type))
            {
                assignSignatureRectangle(signatureField, cosBaseDict);
                annotNotFound = false;
            }

            // Search for signature field
            COSBase fieldType = cosBaseDict.getDictionaryObject(COSName.FT);
            COSBase apDict = cosBaseDict.getDictionaryObject(COSName.AP);
            if (sigFieldNotFound && COSName.SIG.equals(fieldType) && apDict instanceof COSDictionary)
            {
                assignAppearanceDictionary(signatureField, (COSDictionary) apDict);
                assignAcroFormDefaultResource(acroForm, cosBaseDict);
                sigFieldNotFound = false;
            }
        }
    }
    
    if (annotNotFound || sigFieldNotFound)
    {
        throw new IllegalArgumentException("Template is missing required objects");
    }
}
 
Example #17
Source File: COSWriter.java    From gcs with Mozilla Public License 2.0 4 votes vote down vote up
@Override
public Object visitFromDocument(COSDocument doc) throws IOException
{
    if(!incrementalUpdate)
    {
        doWriteHeader(doc);
    }
    else
    {
        // Sometimes the original file will be missing a newline at the end
        // In order to avoid having %%EOF the first object on the same line
        // as the %%EOF, we put a newline here. If there's already one at
        // the end of the file, an extra one won't hurt. PDFBOX-1051
        getStandardOutput().writeCRLF();
    }

    doWriteBody(doc);

    // get the previous trailer
    COSDictionary trailer = doc.getTrailer();
    long hybridPrev = -1;

    if (trailer != null)
    {
        hybridPrev = trailer.getLong(COSName.XREF_STM);
    }

    if(incrementalUpdate || doc.isXRefStream())
    {
        doWriteXRefInc(doc, hybridPrev);
    }
    else
    {
        doWriteXRefTable();
        doWriteTrailer(doc);
    }

    // write endof
    getStandardOutput().write(STARTXREF);
    getStandardOutput().writeEOL();
    getStandardOutput().write(String.valueOf(getStartxref()).getBytes(Charsets.ISO_8859_1));
    getStandardOutput().writeEOL();
    getStandardOutput().write(EOF);
    getStandardOutput().writeEOL();

    if (incrementalUpdate)
    {
        if (signatureOffset == 0 || byteRangeOffset == 0)
        {
            doWriteIncrement();
        }
        else
        {
            doWriteSignature();
        }
    }

    return null;
}
 
Example #18
Source File: COSWriter.java    From gcs with Mozilla Public License 2.0 4 votes vote down vote up
/**
 * This will write the header to the PDF document.
 *
 * @param doc The document to get the data from.
 *
 * @throws IOException If there is an error writing to the stream.
 */
protected void doWriteHeader(COSDocument doc) throws IOException
{
    String headerString;
    if (fdfDocument != null)
    {
        headerString = "%FDF-"+ Float.toString(doc.getVersion());
    }
    else
    {
        headerString = "%PDF-"+ Float.toString(doc.getVersion());
    }
    getStandardOutput().write( headerString.getBytes(Charsets.ISO_8859_1) );
    
    getStandardOutput().writeEOL();
    getStandardOutput().write(COMMENT);
    getStandardOutput().write(GARBAGE);
    getStandardOutput().writeEOL();
}
 
Example #19
Source File: PDFTemplateStructure.java    From gcs with Mozilla Public License 2.0 3 votes vote down vote up
/**
 * Returns the visual signature COSDocument as a stream and closes the template field
 * PDDocument.
 *
 * @return the visual signature COSDocument as a stream
 * @throws IOException
 * @deprecated This will be removed in 2.1 because the method name is misleading and confusing,
 * and the work done rather belongs into the calling class:
 * <pre>
 * {@code
 * COSDocument visualSignature = structure.getVisualSignature();
 *  ByteArrayOutputStream baos = new ByteArrayOutputStream();
 *  COSWriter writer = new COSWriter(baos);
 *  writer.write(visualSignature);
 *  writer.close();
 *  structure.getTemplate().close();
 *  ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
 * } </pre>
 */
@Deprecated
public ByteArrayInputStream getTemplateAppearanceStream() throws IOException
{
    COSDocument visualSignature = getVisualSignature();
    ByteArrayOutputStream memoryOut = new ByteArrayOutputStream();
    COSWriter memoryWriter = new COSWriter(memoryOut);
    memoryWriter.write(visualSignature);

    ByteArrayInputStream input = new ByteArrayInputStream(memoryOut.toByteArray());

    getTemplate().close();

    return input;
}
 
Example #20
Source File: PDFXrefStreamParser.java    From gcs with Mozilla Public License 2.0 3 votes vote down vote up
/**
 * Constructor.
 *
 * @param stream The stream to parse.
 * @param document The document for the current parsing.
 * @param resolver resolver to read the xref/trailer information
 *
 * @throws IOException If there is an error initializing the stream.
 */
public PDFXrefStreamParser(COSStream stream, COSDocument document, XrefTrailerResolver resolver)
        throws IOException
{
    super(new InputStreamSource(stream.createInputStream()));
    this.stream = stream;
    this.document = document;
    this.xrefTrailerResolver = resolver;
}
 
Example #21
Source File: PDFObjectStreamParser.java    From gcs with Mozilla Public License 2.0 3 votes vote down vote up
/**
 * Constructor.
 *
 * @param stream The stream to parse.
 * @param document The document for the current parsing.
 * @throws IOException If there is an error initializing the stream.
 */
public PDFObjectStreamParser(COSStream stream, COSDocument document) throws IOException
{
    super(new InputStreamSource(stream.createInputStream()));
    this.stream = stream;
    this.document = document;
}
 
Example #22
Source File: PDDocument.java    From gcs with Mozilla Public License 2.0 3 votes vote down vote up
/**
 * Constructor that uses an existing document. The COSDocument that is passed in must be valid.
 * 
 * @param doc The COSDocument that this document wraps.
 * @param source the parser which is used to read the pdf
 * @param permission he access permissions of the pdf
 * 
 */
public PDDocument(COSDocument doc, RandomAccessRead source, AccessPermission permission)
{
    document = doc;
    pdfSource = source;
    accessPermission = permission;
}
 
Example #23
Source File: SignatureOptions.java    From gcs with Mozilla Public License 2.0 2 votes vote down vote up
/**
 * Get the visual signature.
 *
 * @return the visual signature
 */
public COSDocument getVisualSignature()
{
    return visualSignature;
}
 
Example #24
Source File: PDDocument.java    From gcs with Mozilla Public License 2.0 2 votes vote down vote up
/**
 * Constructor that uses an existing document. The COSDocument that is passed in must be valid.
 * 
 * @param doc The COSDocument that this document wraps.
 */
public PDDocument(COSDocument doc)
{
    this(doc, null);
}
 
Example #25
Source File: PDFTemplateStructure.java    From gcs with Mozilla Public License 2.0 2 votes vote down vote up
/**
 * Gets COSDocument of visible Signature.
 * @see org.apache.pdfbox.cos.COSDocument
 * @return the visual signature
 */
public COSDocument getVisualSignature()
{
    return visualSignature;
}
 
Example #26
Source File: PDFTemplateStructure.java    From gcs with Mozilla Public License 2.0 2 votes vote down vote up
/**
 * 
 * Sets COSDocument of visible Signature.
 * @see org.apache.pdfbox.cos.COSDocument
 * @param visualSignature
 */
public void setVisualSignature(COSDocument visualSignature)
{
    this.visualSignature = visualSignature;
}
 
Example #27
Source File: PDStream.java    From gcs with Mozilla Public License 2.0 2 votes vote down vote up
/**
 * Creates a new empty PDStream object.
 *
 * @param document The document that the stream will be part of.
 */
public PDStream(COSDocument document)
{
    stream = document.createCOSStream();
}
 
Example #28
Source File: FDFDocument.java    From gcs with Mozilla Public License 2.0 2 votes vote down vote up
/**
 * This will get the low level document.
 *
 * @return The document that this layer sits on top of.
 */
public COSDocument getDocument()
{
    return document;
}
 
Example #29
Source File: FDFDocument.java    From gcs with Mozilla Public License 2.0 2 votes vote down vote up
/**
 * Constructor that uses an existing document. The COSDocument that is passed in must be valid.
 *
 * @param doc The COSDocument that this document wraps.
 */
public FDFDocument(COSDocument doc)
{
    document = doc;
}
 
Example #30
Source File: COSWriter.java    From gcs with Mozilla Public License 2.0 2 votes vote down vote up
/**
 * This will write the pdf document.
 *
 * @throws IOException If an error occurs while generating the data.
 * @param doc The document to write.
 */
public void write(COSDocument doc) throws IOException
{
    PDDocument pdDoc = new PDDocument( doc );
    write( pdDoc );
}