Java Code Examples for org.apache.pdfbox.pdmodel.PDDocument#load()

The following examples show how to use org.apache.pdfbox.pdmodel.PDDocument#load() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PdfIntegrationTests.java    From java-wkhtmltopdf-wrapper with MIT License 6 votes vote down vote up
@Test
public void testPdfWithXvfb() throws Exception {
    WrapperConfig wc = null;
    if (!System.getProperty("os.name").toLowerCase().contains("windows")) {
        XvfbConfig xc = new XvfbConfig();
        xc.addParams(new Param("--auto-servernum"), new Param("--server-num=1"));

        wc = new WrapperConfig();
        wc.setXvfbConfig(xc);
    }
    Pdf pdf = wc != null ? new Pdf(wc) : new Pdf();
    pdf.addPageFromUrl("http://www.google.com");

    pdf.saveAs("output.pdf");

    // WHEN
    byte[] pdfBytes = pdf.getPDF();
    PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(pdfBytes));
    String pdfText = new PDFTextStripper().getText(pdDocument);

    Assert.assertThat("document should be generated", pdfText, containsString("Google"));
}
 
Example 2
Source File: PAdESLevelBCertificationTest.java    From dss with GNU Lesser General Public License v2.1 6 votes vote down vote up
@Override
protected void onDocumentSigned(byte[] byteArray) {
	super.onDocumentSigned(byteArray);

	try (PDDocument document = PDDocument.load(byteArray);) {
		COSBase docMDP = null;
		COSBase perms = document.getDocumentCatalog().getCOSObject().getDictionaryObject(COSName.PERMS);
		if (perms instanceof COSDictionary) {
			COSDictionary permsDict = (COSDictionary) perms;
			docMDP = permsDict.getDictionaryObject(COSName.DOCMDP);
		}
		assertNotNull(docMDP);
	} catch (Exception e) {
		fail(e.getMessage());
	}
}
 
Example 3
Source File: PdfBoxSignatureService.java    From dss with GNU Lesser General Public License v2.1 6 votes vote down vote up
@Override
public DSSDocument sign(final DSSDocument toSignDocument, final byte[] signatureValue,
		final PAdESCommonParameters parameters) {
	
	checkDocumentPermissions(toSignDocument, parameters.getPasswordProtection());

	try (ByteArrayOutputStream baos = new ByteArrayOutputStream();
			InputStream is = toSignDocument.openStream();
			PDDocument pdDocument = PDDocument.load(is, parameters.getPasswordProtection())) {

		signDocumentAndReturnDigest(parameters, signatureValue, baos, pdDocument);

		DSSDocument signature = new InMemoryDocument(baos.toByteArray());
		signature.setMimeType(MimeType.PDF);
		return signature;
	} catch (IOException e) {
		throw new DSSException(e);
	}
}
 
Example 4
Source File: WaterMarkConverter.java    From workable-converter with GNU General Public License v3.0 6 votes vote down vote up
@Override
public boolean byStream(ConvertRequire require) throws ConvertFailedException {
    try {
        PDDocument pdfFile = PDDocument.load(require.getSrcStream());
        HashMap<Integer, String> overlayGuide = new HashMap<>();

        String tmpName = this.getTmpName(require.getWaterMarkRequire());
        //0 means add watermark in all page
        if (require.getWaterMarkRequire().getWaterMarkPage() == 0) {
            for (int i = 0; i < pdfFile.getNumberOfPages(); i++) {
                overlayGuide.put(i + 1, tmpName);
            }
        } else {
            overlayGuide.put(require.getWaterMarkRequire().getWaterMarkPage(), tmpName);
        }
        Overlay overlay = new Overlay();
        overlay.setInputPDF(pdfFile);
        overlay.setOverlayPosition(Overlay.Position.BACKGROUND);
        overlay.overlay(overlayGuide);
        pdfFile.save(require.getDestStream());
    } catch (IOException e) {
        throw new ConvertFailedException(e.getMessage());
    }
    return true;
}
 
Example 5
Source File: TextToPdfContentTransformerTest.java    From alfresco-repository with GNU Lesser General Public License v3.0 5 votes vote down vote up
private void transformTextAndCheck(String text, String encoding, String checkText)
        throws IOException
{
    // Get a reader for the text
    ContentReader reader = buildContentReader(text, Charset.forName(encoding));
    
    // And a temp writer
    File out = TempFileProvider.createTempFile("AlfrescoTest_", ".pdf");
    ContentWriter writer = new FileContentWriter(out);
    writer.setMimetype("application/pdf");
    
    // Transform to PDF
    transformer.transform(reader, writer);
    
    // Read back in the PDF and check it
    PDDocument doc = PDDocument.load(out);
    PDFTextStripper textStripper = new PDFTextStripper();
    StringWriter textWriter = new StringWriter();
    textStripper.writeText(doc, textWriter);
    doc.close();
    
    String roundTrip = clean(textWriter.toString());
    
    assertEquals(
            "Incorrect text in PDF when starting from text in " + encoding,
            checkText, roundTrip
    );
}
 
Example 6
Source File: PdfBoxUtilities.java    From tess4j with Apache License 2.0 5 votes vote down vote up
/**
 * Splits PDF.
 *
 * @param inputPdfFile input file
 * @param outputPdfFile output file
 * @param firstPage begin page
 * @param lastPage end page
 */
public static void splitPdf(File inputPdfFile, File outputPdfFile, int firstPage, int lastPage) {
    PDDocument document = null;
    try {
        document = PDDocument.load(inputPdfFile);
        Splitter splitter = new Splitter();

        splitter.setStartPage(firstPage);
        splitter.setEndPage(lastPage);
        splitter.setSplitAtPage(lastPage - firstPage + 1);

        List<PDDocument> documents = splitter.split(document);

        if (documents.size() == 1) {
            PDDocument outputPdf = documents.get(0);
            outputPdf.save(outputPdfFile);
            outputPdf.close();
        } else {
            logger.error("Splitter returned " + documents.size() + " documents rather than expected of 1");
        }
    } catch (IOException ioe) {
        logger.error("Exception splitting PDF => " + ioe);
    } finally {
        if (document != null) {
            try {
                document.close();
            } catch (Exception e) {
            }
        }
    }
}
 
Example 7
Source File: WaterMarkConverter.java    From workable-converter with GNU General Public License v3.0 5 votes vote down vote up
@Override
public boolean byFilePath(ConvertRequire require) throws ConvertFailedException {
    try {
        PDDocument pdfFile = PDDocument.load(new File(require.getWaitingFilePath()));
        HashMap<Integer, String> overlayGuide = new HashMap<>();

        String tmpName = this.getTmpName(require.getWaterMarkRequire());

        //0 means add watermark in all page
        if (require.getWaterMarkRequire().getWaterMarkPage() == 0) {
            for (int i = 0; i < pdfFile.getNumberOfPages(); i++) {
                overlayGuide.put(i + 1, tmpName);
            }
        } else {
            overlayGuide.put(require.getWaterMarkRequire().getWaterMarkPage(), tmpName);
        }
        Overlay overlay = new Overlay();
        overlay.setInputPDF(pdfFile);
        overlay.setOverlayPosition(Overlay.Position.BACKGROUND);
        overlay.overlay(overlayGuide);
        pdfFile.save(require.getResultFilePath());
    } catch ( IOException e) {
        throw new ConvertFailedException(e.getMessage());
    }

    return true;
}
 
Example 8
Source File: TestObjectExtractor.java    From tabula-java with MIT License 5 votes vote down vote up
@Test
public void testGoodPassword() throws IOException {
    PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/encrypted.pdf"), "userpassword");
    ObjectExtractor oe = new ObjectExtractor(pdf_document);
    List<Page> pages = new ArrayList<>();
    PageIterator pi = oe.extract();
    while (pi.hasNext()) {
        pages.add(pi.next());
    }
    assertEquals(1, pages.size());
}
 
Example 9
Source File: WeatherBackendApplicationTests.java    From cxf-spring-cloud-netflix-docker with MIT License 5 votes vote down vote up
/**
 * Extracts all the Text inside a Pdf
 */
private static String extractPdfText(byte[] pdfData) throws IOException {
    PDDocument pdfDocument = PDDocument.load(new ByteArrayInputStream(pdfData));
    try {
        return new PDFTextStripper().getText(pdfDocument);
    } finally {
        pdfDocument.close();
    }
}
 
Example 10
Source File: PdfIntegrationTests.java    From java-wkhtmltopdf-wrapper with MIT License 5 votes vote down vote up
private String getPdfTextFromBytes(byte[] pdfBytes) throws IOException {
    PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(pdfBytes));
    String text = new PDFTextStripper().getText(pdDocument);

    pdDocument.close();
    return text;
}
 
Example 11
Source File: PdfUtils.java    From job with MIT License 5 votes vote down vote up
public static String parsePdf2Text(InputStream input) throws Exception {
  PDDocument doc = PDDocument.load(input);
  ByteArrayOutputStream output = new ByteArrayOutputStream();
  OutputStreamWriter writer = new OutputStreamWriter(output);
  try {
    PDFTextStripper stripper = new PDFTextStripper();
    stripper.writeText(doc, writer);
  } finally {
    doc.close();
    input.close();
    output.close();
    writer.close();
  }
  return new String(output.toByteArray());
}
 
Example 12
Source File: PdfTools.java    From MyBox with Apache License 2.0 5 votes vote down vote up
public static BufferedImage page2image(File file, String password, int page,
        int dpi, ImageType imageType) {
    try {
        try ( PDDocument doc = PDDocument.load(file, password, AppVariables.pdfMemUsage)) {
            PDFRenderer renderer = new PDFRenderer(doc);
            BufferedImage image = renderer.renderImageWithDPI(page, dpi, imageType);
            doc.close();
            return image;
        }
    } catch (Exception e) {
        logger.debug(e.toString());
        return null;
    }
}
 
Example 13
Source File: FopIntegrationTest.java    From wildfly-camel with Apache License 2.0 5 votes vote down vote up
@Test
public void testFopComponentWithCustomFactory() throws Exception {
    FopFactory fopFactory = FopFactory.newInstance(new URI("/"), FopIntegrationTest.class.getResourceAsStream("/factory.xml"));
    initialContext.bind("fopFactory", fopFactory);

    CamelContext camelctx = new DefaultCamelContext(new JndiBeanRepository());
    camelctx.addRoutes(new RouteBuilder() {
        @Override
        public void configure() throws Exception {
            from("direct:start")
            .to("xslt:template.xsl")
            .setHeader("foo", constant("bar"))
            .to("fop:pdf?fopFactory=#fopFactory")
            .setHeader(Exchange.FILE_NAME, constant("resultB.pdf"))
            .to("file:{{jboss.server.data.dir}}/fop")
            .to("mock:result");
        }
    });

    MockEndpoint mockEndpoint = camelctx.getEndpoint("mock:result", MockEndpoint.class);
    mockEndpoint.expectedMessageCount(1);

    camelctx.start();
    try {
        ProducerTemplate template = camelctx.createProducerTemplate();
        template.sendBody("direct:start", FopIntegrationTest.class.getResourceAsStream("/data.xml"));

        mockEndpoint.assertIsSatisfied();

        String dataDir = System.getProperty("jboss.server.data.dir");
        PDDocument document = PDDocument.load(Paths.get(dataDir, "fop", "resultB.pdf").toFile());
        String pdfText = extractTextFromDocument(document);
        Assert.assertTrue(pdfText.contains("Project"));
        Assert.assertTrue(pdfText.contains("John Doe"));
    } finally {
        camelctx.close();
        initialContext.unbind("fopFactory");
    }
}
 
Example 14
Source File: PDF2ImageExample.java    From tutorials with MIT License 5 votes vote down vote up
private static void generateImageFromPDF(String filename, String extension) throws IOException {
	PDDocument document = PDDocument.load(new File(filename));
	PDFRenderer pdfRenderer = new PDFRenderer(document);
	for (int page = 0; page < document.getNumberOfPages(); ++page) {
		BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
		ImageIOUtil.writeImage(bim, String.format("src/output/pdf-%d.%s", page + 1, extension), 300);
	}
	document.close();
}
 
Example 15
Source File: TestPdfFontExtractor.java    From FontVerter with GNU Lesser General Public License v3.0 5 votes vote down vote up
@Test
public void givenCorruptOverlyLargeOpenTypeTableLength_doesNotThrowOomError() throws IOException {
    PDDocument doc = PDDocument.load(
            TestUtils.readTestFile("pdf/corrupt_overly_large_opentype_table_length.pdf"));
    PdfFontExtractor extractor = new PdfFontExtractor();

    List<FVFont> fonts = extractor.extractToFVFonts(doc);

    Assert.assertEquals(40, fonts.size());
    doc.close();
}
 
Example 16
Source File: PdfBoxDocumentReader.java    From dss with GNU Lesser General Public License v2.1 5 votes vote down vote up
/**
 * The PDFBox implementation of the Reader
 * 
 * @param binaries a byte array of a PDF to read
 * @param passwordProtection {@link String} a password to open a protected document
 * @throws IOException if an exception occurs
 * @throws eu.europa.esig.dss.pades.exception.InvalidPasswordException if the password is not provided or invalid for a protected document
 */
public PdfBoxDocumentReader(byte[] binaries, String passwordProtection) throws IOException, eu.europa.esig.dss.pades.exception.InvalidPasswordException {
	Objects.requireNonNull(binaries, "The document binaries must be defined!");
	try {
		this.pdDocument = PDDocument.load(binaries, passwordProtection);
	} catch (InvalidPasswordException e) {
           throw new eu.europa.esig.dss.pades.exception.InvalidPasswordException(e.getMessage());
	}
}
 
Example 17
Source File: PdfBoxUtilities.java    From tess4j with Apache License 2.0 4 votes vote down vote up
/**
 * Converts PDF to PNG format.
 *
 * @param inputPdfFile input file
 * @return an array of PNG images
 * @throws java.io.IOException
 */
public static File[] convertPdf2Png(File inputPdfFile) throws IOException {
    Path path = Files.createTempDirectory("tessimages");
    File imageDir = path.toFile();

    PDDocument document = null;
    try {
        document = PDDocument.load(inputPdfFile);
        PDFRenderer pdfRenderer = new PDFRenderer(document);
        for (int page = 0; page < document.getNumberOfPages(); ++page) {
            BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);

            // suffix in filename will be used as the file format
            String filename = String.format("workingimage%04d.png", page + 1);
            ImageIOUtil.writeImage(bim, new File(imageDir, filename).getAbsolutePath(), 300);
        }
    } catch (IOException ioe) {
        logger.error("Error extracting PDF Document => " + ioe);
    } finally {
        if (imageDir.list().length == 0) {
            imageDir.delete();
        }

        if (document != null) {
            try {
                document.close();
            } catch (Exception e) {
            }
        }
    }

    // find working files
    File[] workingFiles = imageDir.listFiles(new FilenameFilter() {

        @Override
        public boolean accept(File dir, String name) {
            return name.toLowerCase().matches("workingimage\\d{4}\\.png$");
        }
    });

    Arrays.sort(workingFiles, new Comparator<File>() {
        @Override
        public int compare(File f1, File f2) {
            return f1.getName().compareTo(f2.getName());
        }
    });

    return workingFiles;
}
 
Example 18
Source File: PDFExtractor.java    From science-parse with Apache License 2.0 4 votes vote down vote up
@SneakyThrows
public PdfDocExtractionResult extractResultFromInputStream(InputStream is) {
  try (PDDocument pdfBoxDoc = PDDocument.load(is)) {
    return extractResultFromPDDocument(pdfBoxDoc);
  }
}
 
Example 19
Source File: PdfFontExtractor.java    From FontVerter with GNU Lesser General Public License v3.0 4 votes vote down vote up
public void extractFontsToDir(byte[] pdf, String path) throws IOException {
    PDDocument doc = PDDocument.load(pdf);
    extractFontsToDir(doc, path);
    doc.close();
}
 
Example 20
Source File: PdfViewController.java    From MyBox with Apache License 2.0 4 votes vote down vote up
protected void loadOutline() {
    if (!infoLoaded.get() || outlineTree.getRoot() != null) {
        return;
    }
    synchronized (this) {
        if (outlineTask != null) {
            outlineTask.cancel();
        }
        outlineTask = new Task<Void>() {
            protected PDDocument doc;

            @Override
            protected Void call() {
                try {
                    doc = PDDocument.load(sourceFile, password, AppVariables.pdfMemUsage);
                } catch (Exception e) {
                    logger.debug(e.toString());
                }
                return null;
            }

            @Override
            protected void succeeded() {
                super.succeeded();
                outlineTask = null;
                if (doc != null) {
                    Platform.runLater(new Runnable() {
                        @Override
                        public void run() {
                            try {
                                PDDocumentOutline outline = doc.getDocumentCatalog().getDocumentOutline();
                                TreeItem outlineRoot = new TreeItem<>(AppVariables.message("Bookmarks"));
                                outlineRoot.setExpanded(true);
                                outlineTree.setRoot(outlineRoot);
                                if (outline != null) {
                                    loadOutlineItem(outline, outlineRoot);
                                }
                                doc.close();
                            } catch (Exception e) {
                                logger.debug(e.toString());
                            }
                        }
                    });
                } else {
                    popFailed();
                }
            }

            @Override
            protected void failed() {
                super.failed();
                outlineTask = null;
            }

            @Override
            protected void cancelled() {
                super.cancelled();
                outlineTask = null;
            }
        };
        openHandlingStage(outlineTask, Modality.WINDOW_MODAL);
        Thread thread = new Thread(outlineTask);
        thread.setDaemon(true);
        thread.start();
    }
}