Java Code Examples for org.apache.pdfbox.pdfparser.PDFParser#parse()

The following examples show how to use org.apache.pdfbox.pdfparser.PDFParser#parse() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PdfBleachSession.java    From DocBleach with MIT License 6 votes vote down vote up
@SuppressFBWarnings(
    value = "EXS_EXCEPTION_SOFTENING_RETURN_FALSE",
    justification = "This method is an helper to check the password")
private PDDocument testPassword(ScratchFile inFile, RandomAccessRead source, String password)
    throws IOException {
  PDFParser parser = new PDFParser(source, password, inFile);
  try {
    parser.parse();
    return parser.getPDDocument();
  } catch (InvalidPasswordException e) {
    LOGGER.error("The tested password is invalid");
    return null;
  } finally {
    source.rewind((int) source.getPosition());
  }
}
 
Example 2
Source File: PDDocument.java    From gcs with Mozilla Public License 2.0 6 votes vote down vote up
private static PDDocument load(RandomAccessBufferedFileInputStream raFile, String password,
                               InputStream keyStore, String alias,
                               MemoryUsageSetting memUsageSetting) throws IOException
{
    ScratchFile scratchFile = new ScratchFile(memUsageSetting);
    try
    {
        PDFParser parser = new PDFParser(raFile, password, keyStore, alias, scratchFile);
        parser.parse();
        return parser.getPDDocument();
    }
    catch (IOException ioe)
    {
        IOUtils.closeQuietly(scratchFile);
        throw ioe;
    }
}
 
Example 3
Source File: PDDocument.java    From gcs with Mozilla Public License 2.0 6 votes vote down vote up
/**
 * Parses a PDF. Depending on the memory settings parameter the given input stream is either
 * copied to memory or to a temporary file to enable random access to the pdf.
 *
 * @param input stream that contains the document. Don't forget to close it after loading.
 * @param password password to be used for decryption
 * @param keyStore key store to be used for decryption when using public key security 
 * @param alias alias to be used for decryption when using public key security
 * @param memUsageSetting defines how memory is used for buffering input stream and PDF streams 
 * 
 * @return loaded document
 * 
 * @throws InvalidPasswordException If the password is incorrect.
 * @throws IOException In case of a reading or parsing error.
 */
public static PDDocument load(InputStream input, String password, InputStream keyStore, 
                              String alias, MemoryUsageSetting memUsageSetting) throws IOException
{
    ScratchFile scratchFile = new ScratchFile(memUsageSetting);
    try
    {
        RandomAccessRead source = scratchFile.createBuffer(input);
        PDFParser parser = new PDFParser(source, password, keyStore, alias, scratchFile);
        parser.parse();
        return parser.getPDDocument();
    }
    catch (IOException ioe)
    {
        IOUtils.closeQuietly(scratchFile);
        throw ioe;
    }
}
 
Example 4
Source File: PDFReader.java    From swcv with MIT License 6 votes vote down vote up
private boolean getFile(String url)
{
    try
    {
        URL u = new URL(url);
        URLConnection con = u.openConnection();
        InputStream in = con.getInputStream();
        PDFParser p = new PDFParser(in);
        p.parse();
        PDDocument pdoc = new PDDocument(p.getDocument());
        PDFTextStripper pts = new PDFTextStripper();
        text = pts.getText(pdoc);
        pdoc.close();

        return true;
    }
    catch (Exception e)
    {
        e.printStackTrace();
        return false;
    }
}
 
Example 5
Source File: ExtractTextTools.java    From o2oa with GNU Affero General Public License v3.0 5 votes vote down vote up
public static String pdf(byte[] bytes) {
	try {
		PDFParser parser = new PDFParser(new RandomAccessBuffer(bytes));
		parser.parse();
		try (COSDocument cos = parser.getDocument(); PDDocument pd = new PDDocument(cos)) {
			PDFTextStripper stripper = new PDFTextStripper();
			stripper.setStartPage(1);
			stripper.setEndPage(pd.getNumberOfPages());
			return stripper.getText(pd);
		}
	} catch (Exception e) {
		logger.error(e);
	}
	return null;
}
 
Example 6
Source File: ExtractTextHelper.java    From o2oa with GNU Affero General Public License v3.0 5 votes vote down vote up
public static String pdf(byte[] bytes) {
	try {
		PDFParser parser = new PDFParser(new RandomAccessBuffer(bytes));
		parser.parse();
		try (COSDocument cos = parser.getDocument(); PDDocument pd = new PDDocument(cos)) {
			PDFTextStripper stripper = new PDFTextStripper();
			stripper.setStartPage(1);
			stripper.setEndPage(pd.getNumberOfPages());
			return stripper.getText(pd);
		}
	} catch (Exception e) {
		logger.error(e);
	}
	return null;
}
 
Example 7
Source File: ExtractTextHelper.java    From o2oa with GNU Affero General Public License v3.0 5 votes vote down vote up
public static String pdf(byte[] bytes) {
	try {
		PDFParser parser = new PDFParser(new RandomAccessBuffer(bytes));
		parser.parse();
		try (COSDocument cos = parser.getDocument(); PDDocument pd = new PDDocument(cos)) {
			PDFTextStripper stripper = new PDFTextStripper();
			stripper.setStartPage(1);
			stripper.setEndPage(pd.getNumberOfPages());
			return stripper.getText(pd);
		}
	} catch (Exception e) {
		logger.error(e);
	}
	return null;
}
 
Example 8
Source File: SignatureOptions.java    From gcs with Mozilla Public License 2.0 5 votes vote down vote up
private void initFromRandomAccessRead(RandomAccessRead rar) throws IOException
{
    pdfSource = rar;
    PDFParser parser = new PDFParser(pdfSource);
    parser.parse();
    visualSignature = parser.getDocument();
}
 
Example 9
Source File: ShrinkPDF.java    From shrink-pdf with MIT License 5 votes vote down vote up
/**
 * Shrink a PDF
 * @param f {@code File} pointing to the PDF to shrink
 * @param compQual Compression quality parameter. 0 is
 *                 smallest file, 1 is highest quality.
 * @return The compressed {@code PDDocument}
 * @throws FileNotFoundException
 * @throws IOException 
 */
private PDDocument shrinkMe() 
        throws FileNotFoundException, IOException {
     if(compQual < 0)
         compQual = compQualDefault;
     final RandomAccessBufferedFileInputStream rabfis = 
             new RandomAccessBufferedFileInputStream(input);
     final PDFParser parser = new PDFParser(rabfis);
     parser.parse();
     final PDDocument doc = parser.getPDDocument();
     final PDPageTree pages = doc.getPages();
     final ImageWriter imgWriter;
     final ImageWriteParam iwp;
     if(tiff) {
         final Iterator<ImageWriter> tiffWriters =
               ImageIO.getImageWritersBySuffix("png");
         imgWriter = tiffWriters.next();
         iwp = imgWriter.getDefaultWriteParam();
         //iwp.setCompressionMode(ImageWriteParam.MODE_DISABLED);
     } else {
         final Iterator<ImageWriter> jpgWriters = 
               ImageIO.getImageWritersByFormatName("jpeg");
         imgWriter = jpgWriters.next();
         iwp = imgWriter.getDefaultWriteParam();
         iwp.setCompressionMode(ImageWriteParam.MODE_EXPLICIT);
         iwp.setCompressionQuality(compQual);
     }
     for(PDPage p : pages) {
          scanResources(p.getResources(), doc, imgWriter, iwp);
     }
     return doc;
}
 
Example 10
Source File: PDF2TextExample.java    From tutorials with MIT License 5 votes vote down vote up
private static void generateTxtFromPDF(String filename) throws IOException {
	File f = new File(filename);
	String parsedText;
	PDFParser parser = new PDFParser(new RandomAccessFile(f, "r"));
	parser.parse();

	COSDocument cosDoc = parser.getDocument();

	PDFTextStripper pdfStripper = new PDFTextStripper();
	PDDocument pdDoc = new PDDocument(cosDoc);

	parsedText = pdfStripper.getText(pdDoc);

	if (cosDoc != null)
		cosDoc.close();
	if (pdDoc != null)
		pdDoc.close();

	PrintWriter pw = new PrintWriter("src/output/pdf.txt");
	pw.print(parsedText);
	pw.close();
}
 
Example 11
Source File: PDDocument.java    From gcs with Mozilla Public License 2.0 3 votes vote down vote up
/**
 * Parses a PDF.
 * 
 * @param input byte array that contains the document.
 * @param password password to be used for decryption
 * @param keyStore key store to be used for decryption when using public key security 
 * @param alias alias to be used for decryption when using public key security
 * @param memUsageSetting defines how memory is used for buffering input stream and PDF streams 
 * 
 * @return loaded document
 * 
 * @throws InvalidPasswordException If the password is incorrect.
 * @throws IOException In case of a reading or parsing error.
 */
public static PDDocument load(byte[] input, String password, InputStream keyStore, 
        String alias, MemoryUsageSetting memUsageSetting) throws IOException
{
    ScratchFile scratchFile = new ScratchFile(memUsageSetting);
    RandomAccessRead source = new RandomAccessBuffer(input);
    PDFParser parser = new PDFParser(source, password, keyStore, alias, scratchFile);
    parser.parse();
    return parser.getPDDocument();
}