org.apache.pdfbox.pdmodel.PDDocument Java Examples

The following examples show how to use org.apache.pdfbox.pdmodel.PDDocument. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ReportTableWithVerticalLines.java    From cat-boot with Apache License 2.0 7 votes vote down vote up
@Override
public float print(PDDocument document, PDPageContentStream stream, int pageNumber, float startX, float startY,
                   float allowedWidth) throws IOException {
    if (title != null) {
        throw new IllegalStateException("title not implemented!");
    }
    float y = startY;
    int i = 0;

    float lineY = 0;
    for (ReportElement[] line : elements) {
        float lineHeight = getLineHeight(line, allowedWidth) + pdfStyleSheet.getLineDistance();
        y = printLine(document, stream, pageNumber, startX, y, allowedWidth, line, lineY);
        placeFirstBorder = i == 0;
        placeLastBorder = i == elements.length - 1;
        placeBorders(stream, startY, y, startX, allowedWidth);
        i++;
        lineY += lineHeight;
    }
    return y;
}
 
Example #2
Source File: PdfSplitBatchController.java    From MyBox with Apache License 2.0 6 votes vote down vote up
private int splitByFilesNumber(PDDocument source) {
    try {
        int total = currentParameters.toPage - currentParameters.fromPage + 1;
        int len;
        if (total % filesNumber == 0) {
            len = total / filesNumber;
        } else {
            len = total / filesNumber + 1;
        }
        Splitter splitter = new Splitter();
        splitter.setStartPage(currentParameters.fromPage);  // 1-based
        splitter.setEndPage(currentParameters.toPage);  // 1-based
        splitter.setMemoryUsageSetting(AppVariables.pdfMemUsage);
        splitter.setSplitAtPage(len);
        List<PDDocument> docs = splitter.split(source);
        return writeFiles(docs);
    } catch (Exception e) {
        logger.error(e.toString());
        return 0;
    }
}
 
Example #3
Source File: JPEGFactory.java    From gcs with Mozilla Public License 2.0 6 votes vote down vote up
private static PDImageXObject createJPEG(PDDocument document, BufferedImage image,
                                         float quality, int dpi) throws IOException
{
    // extract alpha channel (if any)
    BufferedImage awtColorImage = getColorImage(image);
    BufferedImage awtAlphaImage = getAlphaImage(image);

    // create XObject
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    encodeImageToJPEGStream(awtColorImage, quality, dpi, baos);
    ByteArrayInputStream byteStream = new ByteArrayInputStream(baos.toByteArray());
    
    PDImageXObject pdImage = new PDImageXObject(document, byteStream, 
            COSName.DCT_DECODE, awtColorImage.getWidth(), awtColorImage.getHeight(), 
            awtColorImage.getColorModel().getComponentSize(0),
            getColorSpaceFromAWT(awtColorImage));

    // alpha -> soft mask
    if (awtAlphaImage != null)
    {
        PDImage xAlpha = JPEGFactory.createFromImage(document, awtAlphaImage, quality);
        pdImage.getCOSObject().setItem(COSName.SMASK, xAlpha);
    }

    return pdImage;
}
 
Example #4
Source File: PdfVeryDenseMergeTool.java    From testarea-pdfbox2 with Apache License 2.0 6 votes vote down vote up
public void merge(OutputStream outputStream, Iterable<PDDocument> inputs) throws IOException
{
    try
    {
        openDocument();
        for (PDDocument input: inputs)
        {
            merge(input);
        }
        if (currentContents != null) {
            currentContents.close();
            currentContents = null;
        }
        document.save(outputStream);
    }
    finally
    {
        closeDocument();
    }
    
}
 
Example #5
Source File: PdfBleachSession.java    From DocBleach with MIT License 6 votes vote down vote up
private PDDocument getDocument(RandomAccessRead source) throws IOException, BleachException {
  PDDocument doc;
  for (String pwd : COMMON_PASSWORDS) {
    ScratchFile scratchFile = new ScratchFile(MEMORY_USAGE_SETTING);
    doc = testPassword(scratchFile, source, pwd);
    if (doc != null) {
      LOGGER.debug("Password was guessed: '{}'", pwd);
      doc.protect(new StandardProtectionPolicy(pwd, pwd, doc.getCurrentAccessPermission()));
      return doc;
    }
    scratchFile.close();
  }

  // @TODO: fetch password from config?

  throw new BleachException("PDF is protected with an unknown password");
}
 
Example #6
Source File: ExtractColorText.java    From testarea-pdfbox2 with Apache License 2.0 6 votes vote down vote up
/**
 * <a href="https://stackoverflow.com/questions/59031734/get-text-color-in-pdfbox">
 * Get text color in PDFBox
 * </a>
 * <p>
 * This test has already been executed for the original color text stripper class from my answer to
 * <a href="https://stackoverflow.com/questions/21430341/identifying-the-text-based-on-the-output-in-pdf-using-pdfbox">
 * Identifying the text based on the output in PDF using PDFBOX
 * </a>
 * </p>
 * 
 * @throws IOException
 */
@Test
public void testExtractFromFurzoSample() throws IOException {
    try (   InputStream resource = getClass().getResourceAsStream("furzo Sample.pdf");
            PDDocument document = Loader.loadPDF(resource) ) {
        PDFTextStripper stripper = new ColorTextStripper();
        String text = stripper.getText(document);

        Files.write(new File(RESULT_FOLDER, "furzo Sample.txt").toPath(), text.getBytes("UTF-8"));

        System.out.println("/// furzo Sample.pdf ///");
        System.out.println("Stripped text with color:");
        System.out.println(">>>");
        System.out.println(text);
        System.out.println("<<<");
    }
}
 
Example #7
Source File: AbstractShape.java    From pdfbox-layout with MIT License 6 votes vote down vote up
@Override
   public void fill(PDDocument pdDocument, PDPageContentStream contentStream,
    Position upperLeft, float width, float height, Color color,
    DrawListener drawListener) throws IOException {

add(pdDocument, contentStream, upperLeft, width, height);

if (color != null) {
    contentStream.setNonStrokingColor(color);
}
CompatibilityHelper.fillNonZero(contentStream);

if (drawListener != null) {
    drawListener.drawn(this, upperLeft, width, height);
}

   }
 
Example #8
Source File: ExtractMarkedContent.java    From testarea-pdfbox2 with Apache License 2.0 6 votes vote down vote up
/**
 * <a href="https://stackoverflow.com/questions/54956720/how-to-replace-a-space-with-a-word-while-extract-the-data-from-pdf-using-pdfbox">
 * How to replace a space with a word while extract the data from PDF using PDFBox
 * </a>
 * <br/>
 * <a href="https://drive.google.com/open?id=10ZkdPlGWzMJeahwnQPzE6V7s09d1nvwq">
 * test.pdf
 * </a> as "testWPhromma.pdf"
 * <p>
 * This test shows how to, in principle, extract tagged text.
 * </p>
 */
@Test
public void testExtractTestWPhromma() throws IOException {
    System.out.printf("\n\n===\n%s\n===\n", "testWPhromma.pdf");
    try (   InputStream resource = getClass().getResourceAsStream("testWPhromma.pdf")) {
        PDDocument document = Loader.loadPDF(resource);

        Map<PDPage, Map<Integer, PDMarkedContent>> markedContents = new HashMap<>();

        for (PDPage page : document.getPages()) {
            PDFMarkedContentExtractor extractor = new PDFMarkedContentExtractor();
            extractor.processPage(page);

            Map<Integer, PDMarkedContent> theseMarkedContents = new HashMap<>();
            markedContents.put(page, theseMarkedContents);
            for (PDMarkedContent markedContent : extractor.getMarkedContents()) {
                theseMarkedContents.put(markedContent.getMCID(), markedContent);
            }
        }

        PDStructureNode root = document.getDocumentCatalog().getStructureTreeRoot();
        showStructure(root, markedContents);
    }
}
 
Example #9
Source File: AddTextWithDynamicFonts.java    From testarea-pdfbox2 with Apache License 2.0 6 votes vote down vote up
/**
 * @see #testAddLikeCccompanyImproved()
 */
private static ByteArrayOutputStream generatePdfFromStringImproved(String content) throws IOException {
    try (   PDDocument doc = new PDDocument();
            InputStream notoSansRegularResource = AddTextWithDynamicFonts.class.getResourceAsStream("NotoSans-Regular.ttf");
            InputStream notoSansCjkRegularResource = AddTextWithDynamicFonts.class.getResourceAsStream("NotoSansCJKtc-Regular.ttf")   ) {
        PDType0Font notoSansRegular = PDType0Font.load(doc, notoSansRegularResource);
        PDType0Font notoSansCjkRegular = PDType0Font.load(doc, notoSansCjkRegularResource);
        List<PDFont> fonts = Arrays.asList(notoSansRegular, notoSansCjkRegular);

        List<TextWithFont> fontifiedContent = fontify(fonts, content);

        PDPage page = new PDPage();
        doc.addPage(page);
        try (   PDPageContentStream contentStream = new PDPageContentStream(doc, page)) {
            contentStream.beginText();
            for (TextWithFont textWithFont : fontifiedContent) {
                textWithFont.show(contentStream, 12);
            }
            contentStream.endText();
        }
        ByteArrayOutputStream os = new ByteArrayOutputStream();
        doc.save(os);
        return os;
    }
}
 
Example #10
Source File: PdfBoxUtilities.java    From tess4j with Apache License 2.0 6 votes vote down vote up
/**
 * Gets PDF Page Count.
 *
 * @param inputPdfFile input file
 * @return number of pages
 */
public static int getPdfPageCount(File inputPdfFile) {
    PDDocument document = null;
    try {
        document = PDDocument.load(inputPdfFile);
        return document.getNumberOfPages();
    } catch (IOException ioe) {
        logger.error("Error counting PDF pages => " + ioe);
        return - 1;
    } finally {
        if (document != null) {
            try {
                document.close();
            } catch (Exception e) {
            }
        }
    }
}
 
Example #11
Source File: ExtractImages.java    From bluima with Apache License 2.0 6 votes vote down vote up
@Test
public void testPdfBox() throws IOException {

    File pdfFile = new File(PdfHelper.PDF_TEST_RESOURCES + "pdf/1.pdf");
    File outDir = new File("target");
    
    PDDocument document = PDDocument.load(pdfFile);
    @SuppressWarnings("unchecked")
    List<PDPage> pages = document.getDocumentCatalog().getAllPages();
    int imageId = 0;
    for (PDPage page : pages) {
        for (PDXObjectImage img : page.getResources().getImages().values()) {
            
            int height = img.getHeight();
            int width = img.getWidth();
            
            System.out.println(img.getCOSStream().toString());
            
            img.write2file(new File(outDir, imageId++ + "."
                    + img.getSuffix()));
        }
    }
}
 
Example #12
Source File: CreateSignature.java    From testarea-pdfbox2 with Apache License 2.0 6 votes vote down vote up
/**
 * <a href="http://stackoverflow.com/questions/41767351/create-pkcs7-signature-from-file-digest">
 * Create pkcs7 signature from file digest
 * </a>
 * <p>
 * A minimal signing frame work merely requiring a {@link SignatureInterface}
 * instance.
 * </p>
 */
void sign(PDDocument document, OutputStream output, SignatureInterface signatureInterface) throws IOException
{
    PDSignature signature = new PDSignature();
    signature.setFilter(PDSignature.FILTER_ADOBE_PPKLITE);
    signature.setSubFilter(PDSignature.SUBFILTER_ADBE_PKCS7_DETACHED);
    signature.setName("Example User");
    signature.setLocation("Los Angeles, CA");
    signature.setReason("Testing");
    signature.setSignDate(Calendar.getInstance());
    document.addSignature(signature);
    ExternalSigningSupport externalSigning =
            document.saveIncrementalForExternalSigning(output);
    // invoke external signature service
    byte[] cmsSignature = signatureInterface.sign(externalSigning.getContent());
    // set signature bytes received from the service
    externalSigning.setSignature(cmsSignature);
}
 
Example #13
Source File: CreateEmptyPdf.java    From blog-codes with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws IOException {
	// Create a new empty document
	PDDocument document = new PDDocument();

	// Create a new blank page and add it to the document
	PDPage blankPage = new PDPage();
	document.addPage( blankPage );

	// Save the newly created document
	document.save("/home/lili/data/BlankPage.pdf");

	// finally make sure that the document is properly
	// closed.
	document.close();

}
 
Example #14
Source File: ExtractTextExample.java    From blog-codes with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws InvalidPasswordException, IOException {
    try (PDDocument document = PDDocument.load(new File("/home/lili/data/test.pdf"))) {
        if (!document.isEncrypted()) {
            PDFTextStripper tStripper = new PDFTextStripper();
            // 如果想抽取某一页或者某几页,可以使用下面的方法限定范围。
            // 目前是抽取所有页
            tStripper.setStartPage(0);
            tStripper.setEndPage(document.getNumberOfPages());
            String pdfFileInText = tStripper.getText(document);
            String lines[] = pdfFileInText.split("\\r?\\n"); 
            for (String line : lines) {
                System.out.println(line);  
            } 
        }
    }
}
 
Example #15
Source File: CreateMultipleVisualizations.java    From testarea-pdfbox2 with Apache License 2.0 6 votes vote down vote up
/**
 * <a href="https://stackoverflow.com/questions/52829507/multiple-esign-using-pdfbox-2-0-12-java">
 * Multiple esign using pdfbox 2.0.12 java?
 * </a>
 * <p>
 * This test demonstrates how to create a single signature in multiple signature
 * fields with one widget annotation each only referenced from a single page each
 * only. (Actually there is an extra invisible signature; it is possible to get
 * rid of it with some more code.)
 * </p>
 */
@Test
public void testCreateSignatureWithMultipleVisualizations() throws IOException {
    try (   InputStream resource = getClass().getResourceAsStream("/mkl/testarea/pdfbox2/analyze/test-rivu.pdf");
            OutputStream result = new FileOutputStream(new File(RESULT_FOLDER, "testSignedMultipleVisualizations.pdf"));
            PDDocument pdDocument = Loader.loadPDF(resource)   )
    {
        PDAcroForm acroForm = pdDocument.getDocumentCatalog().getAcroForm();
        if (acroForm == null) {
            pdDocument.getDocumentCatalog().setAcroForm(acroForm = new PDAcroForm(pdDocument));
        }
        acroForm.setSignaturesExist(true);
        acroForm.setAppendOnly(true);
        acroForm.getCOSObject().setDirect(true);

        PDRectangle rectangle = new PDRectangle(100, 600, 300, 100);
        PDSignature signature = new PDSignature();
        signature.setFilter(PDSignature.FILTER_ADOBE_PPKLITE);
        signature.setSubFilter(PDSignature.SUBFILTER_ADBE_PKCS7_DETACHED);
        signature.setName("Example User");
        signature.setLocation("Los Angeles, CA");
        signature.setReason("Testing");
        signature.setSignDate(Calendar.getInstance());
        pdDocument.addSignature(signature, this);

        for (PDPage pdPage : pdDocument.getPages()) {
            addSignatureField(pdDocument, pdPage, rectangle, signature);
        }

        pdDocument.saveIncremental(result);
    }
}
 
Example #16
Source File: PDImageXObject.java    From gcs with Mozilla Public License 2.0 6 votes vote down vote up
/**
 * Creates a COS stream from raw (encoded) data.
 */
private static COSStream createRawStream(PDDocument document, InputStream rawInput)
        throws IOException
{
    COSStream stream = document.getDocument().createCOSStream();
    OutputStream output = null;
    try
    {
        output = stream.createRawOutputStream();
        IOUtils.copy(rawInput, output);
    }
    finally
    {
        if (output != null)
        {
            output.close();
        }
    }
    return stream;
}
 
Example #17
Source File: FillInForm.java    From testarea-pdfbox2 with Apache License 2.0 6 votes vote down vote up
/**
 * <a href="https://stackoverflow.com/questions/56938135/pdfbox-inconsistent-pdtextfield-autosize-behavior-after-setvalue">
 * PDFBox Inconsistent PDTextField Autosize Behavior after setValue
 * </a>
 * <br/>
 * <a href="http://www.filedropper.com/0postfontload">
 * 0.pdf
 * </a>
 * <p>
 * Indeed, some fields look weird after fill-in; for some fields
 * this is due to weird pre-existing appearance streams. These can
 * be fixed as in {@link #testFill0DropOldAppearance()}.
 * </p>
 * @see #testFill0DropOldAppearance()
 * @see #testFill0DropOldAppearanceNoCombNoMax()
 * @see #testFill0DropOldAppearanceNoCombNoMaxNoMultiLine()
 */
@Test
public void testFill0LikeXenyal() throws IOException {
    try (   InputStream originalStream = getClass().getResourceAsStream("0.pdf");
            InputStream fontStream = getClass().getResourceAsStream("Lato-Regular.ttf"))
    {
        PDDocument doc = Loader.loadPDF(originalStream);
        PDAcroForm acroForm = doc.getDocumentCatalog().getAcroForm();

        PDType0Font font = PDType0Font.load(doc, fontStream, false);
        String font_name = acroForm.getDefaultResources().add(font).getName();

        for (PDField field : acroForm.getFieldTree()) {
            if (field instanceof PDTextField) {
                PDTextField textField = (PDTextField) field;
                textField.setDefaultAppearance(String.format("/%s 0 Tf 0 g", font_name));
                textField.setValue("Test");
            }
        }
        

        doc.save(new File(RESULT_FOLDER, "0-filledLikeXenyal.pdf"));
        doc.close();
    }        
}
 
Example #18
Source File: PlayWithHelloSign.java    From testarea-pdfbox2 with Apache License 2.0 6 votes vote down vote up
/**
 * <a href="http://stackoverflow.com/questions/41071142/pdfbox-remove-a-single-field-from-pdf">
 * PDFBox: Remove a single field from PDF
 * </a>
 * <br/>
 * <a href="https://www.dropbox.com/s/oyv1vjyhkmao1t1/input.pdf?dl=0">
 * input.pdf
 * </a>
 * <p>
 * This method applies the {@link HelloSignManipulator} to the sample document
 * and clears the field <code>var1001</code> (<i>address1</i>).
 * </p>
 */
@Test
public void testClearAddress1Input() throws IOException
{
    try (   InputStream resource = getClass().getResourceAsStream("input.pdf");
            PDDocument pdDocument = Loader.loadPDF(resource)   )
    {
        HelloSignAnalyzer helloSignAnalyzer = new HelloSignAnalyzer(pdDocument);

        HelloSignManipulator helloSignManipulator = new HelloSignManipulator(helloSignAnalyzer);

        helloSignManipulator.clearFields(Collections.singleton("var1001"));
        
        pdDocument.save(new File(RESULT_FOLDER, "input-clear-address1.pdf"));
    }
}
 
Example #19
Source File: Frame.java    From pdfbox-layout with MIT License 5 votes vote down vote up
@Override
   public void draw(PDDocument pdDocument, PDPageContentStream contentStream,
    Position upperLeft, DrawListener drawListener) throws IOException {

setInnerMaxWidthIfNecessary();

float halfBorderWidth = 0;
if (getBorderWidth() > 0) {
    halfBorderWidth = getBorderWidth() / 2f;
}
upperLeft = upperLeft.add(getMarginLeft() + halfBorderWidth,
	-getMarginTop() - halfBorderWidth);

if (getShape() != null) {
    float shapeWidth = getWidth() - getMarginLeft() - getMarginRight()
	    - getBorderWidth();
    float shapeHeight = getHeight() - getMarginTop()
	    - getMarginBottom() - getBorderWidth();

    if (getBackgroundColor() != null) {
	getShape().fill(pdDocument, contentStream, upperLeft,
		shapeWidth, shapeHeight, getBackgroundColor(),
		drawListener);
    }
    if (hasBorder()) {
	getShape().draw(pdDocument, contentStream, upperLeft,
		shapeWidth, shapeHeight, getBorderColor(),
		getBorderStroke(), drawListener);
    }
}

Position innerUpperLeft = upperLeft.add(getPaddingLeft()
	+ halfBorderWidth, -getPaddingTop() - halfBorderWidth);

for (Drawable inner : innerList) {
    inner.draw(pdDocument, contentStream, innerUpperLeft, drawListener);
    innerUpperLeft = innerUpperLeft.add(0, -inner.getHeight());
}
   }
 
Example #20
Source File: TestObjectExtractor.java    From tabula-java with MIT License 5 votes vote down vote up
@Test
public void testExtractOnePage() throws IOException {
    PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf"));
    assertEquals(2, pdf_document.getNumberOfPages());

    ObjectExtractor oe = new ObjectExtractor(pdf_document);
    Page page = oe.extract(2);

    assertNotNull(page);

}
 
Example #21
Source File: PDType1Font.java    From gcs with Mozilla Public License 2.0 5 votes vote down vote up
/**
 * Creates a new Type 1 font for embedding.
 *
 * @param doc PDF document to write to
 * @param pfbIn PFB file stream
 * @throws IOException
 */
public PDType1Font(PDDocument doc, InputStream pfbIn) throws IOException
{
    PDType1FontEmbedder embedder = new PDType1FontEmbedder(doc, dict, pfbIn, null);
    encoding = embedder.getFontEncoding();
    glyphList = embedder.getGlyphList();
    type1font = embedder.getType1Font();
    genericFont = embedder.getType1Font();
    isEmbedded = true;
    isDamaged = false;
    fontMatrixTransform = new AffineTransform();
    codeToBytesMap = new HashMap<Integer,byte[]>();
}
 
Example #22
Source File: DSS1444Test.java    From dss with GNU Lesser General Public License v2.1 5 votes vote down vote up
@Test
public void test3() throws IOException {
	try (InputStream is = getClass().getResourceAsStream("/small-red.jpg")) {
		Exception exception = assertThrows(IOException.class, () -> PDDocument.load(is));
		assertEquals("Error: End-of-File, expected line", exception.getMessage());
	}
}
 
Example #23
Source File: PDFPresentation.java    From Quelea with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Make the slides that go in this PDF, this is what takes time and should
 * only be done once.
 *
 * @return all the slides.
 */
private PdfSlide[] makeSlides() throws IOException {
    File pdf = new File(file);
    PDDocument document = PDDocument.load(pdf.getAbsoluteFile());
    Path f = Files.createTempDirectory(null);
    f.toFile().deleteOnExit();
    ArrayList<PdfSlide> ret = new ArrayList<>();
    PDFRenderer pdfRenderer = new PDFRenderer(document);
    int totalPages = document.getNumberOfPages();
    for (int i = 0; i < totalPages; i++) {
        ret.add(new PdfSlide(i + 1, pdfRenderer));
    }
    document.close();
    return ret.toArray(new PdfSlide[ret.size()]);
}
 
Example #24
Source File: ListFormFields.java    From testarea-pdfbox2 with Apache License 2.0 5 votes vote down vote up
/**
 * <a href="https://stackoverflow.com/questions/44817793/the-method-getkids-is-undefined-for-the-type-pdfield">
 * The method getKids() is undefined for the type PDField
 * </a>
 * <br/>
 * <a href="https://issues.apache.org/jira/secure/attachment/12651245/field%20name%20test.pdf">
 * field name test.pdf
 * </a>
 * <p>
 * The problems referred to don't exist anymore.
 * </p>
 */
@Test
public void testListFieldsInFieldNameTest() throws InvalidPasswordException, IOException
{
    PDDocument doc = Loader.loadPDF(getClass().getResourceAsStream("field name test.pdf"));
    PDAcroForm form = doc.getDocumentCatalog().getAcroForm();
    List<PDField> fields = form.getFields();
    for (int i=0; i<fields.size(); i++) {
        PDField f = fields.get(i);
        if (f instanceof PDTerminalField)
        {
            System.out.printf("%s, %s widgets\n", f.getFullyQualifiedName(), f.getWidgets().size());
            for (PDAnnotationWidget widget : f.getWidgets())
                System.out.printf("  %s\n", widget.getAnnotationName());
        }
        else if (f instanceof PDNonTerminalField)
        {
            List<PDField> kids = ((PDNonTerminalField)f).getChildren();
            for (int j=0; j<kids.size(); j++) {
                if (kids.get(j) instanceof PDField) {
                    PDField kidField = (PDField) kids.get(j);
                    System.out.println(kidField.getFullyQualifiedName());
                }
            } 
        }
    }
}
 
Example #25
Source File: TestObjectExtractor.java    From tabula-java with MIT License 5 votes vote down vote up
@Test
public void testGoodPassword() throws IOException {
    PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/encrypted.pdf"), "userpassword");
    ObjectExtractor oe = new ObjectExtractor(pdf_document);
    List<Page> pages = new ArrayList<>();
    PageIterator pi = oe.extract();
    while (pi.hasNext()) {
        pages.add(pi.next());
    }
    assertEquals(1, pages.size());
}
 
Example #26
Source File: WeatherBackendApplicationTests.java    From cxf-spring-cloud-netflix-docker with MIT License 5 votes vote down vote up
/**
 * Extracts all the Text inside a Pdf
 */
private static String extractPdfText(byte[] pdfData) throws IOException {
    PDDocument pdfDocument = PDDocument.load(new ByteArrayInputStream(pdfData));
    try {
        return new PDFTextStripper().getText(pdfDocument);
    } finally {
        pdfDocument.close();
    }
}
 
Example #27
Source File: AlterPDFParser.java    From tika-server with Apache License 2.0 5 votes vote down vote up
private boolean callShouldHandleXFAOnly(PDDocument pdDocument, PDFParserConfig config)
        throws NoSuchMethodException, InvocationTargetException, IllegalAccessException {
    boolean xfa = this.checkDocHasXFA(pdDocument);
    Method m = getClass().getSuperclass().getDeclaredMethod("shouldHandleXFAOnly",
            boolean.class, PDFParserConfig.class);
    m.setAccessible(true);
    return (boolean) m.invoke(this, xfa, config);
}
 
Example #28
Source File: TestObjectExtractor.java    From tabula-java with MIT License 5 votes vote down vote up
@Test
public void testTextElementsContainedInPage() throws IOException {
    PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/cs-en-us-pbms.pdf"));
    ObjectExtractor oe = new ObjectExtractor(pdf_document);

    Page page = oe.extractPage(1);

    for (TextElement te: page.getText()) {
        assertTrue(page.contains(te));
    }
}
 
Example #29
Source File: PdfDenseMergeTool.java    From testarea-pdfbox2 with Apache License 2.0 5 votes vote down vote up
void merge(PDDocument input) throws IOException
{
    for (PDPage page : input.getPages())
    {
        merge(input, page);
    }
}
 
Example #30
Source File: JoinPages.java    From testarea-pdfbox2 with Apache License 2.0 5 votes vote down vote up
/**
 * <a href="https://stackoverflow.com/questions/47295391/create-a-one-page-pdf-from-two-pdfs-using-pdfbox">
 * create a one page PDF from two PDFs using PDFBOX
 * </a>
 * <p>
 * This test shows how to join two pages into one putting one above the other.
 * </p>
 */
@Test
public void testJoinSmallAndBig() throws IOException {
    try (   PDDocument document = new PDDocument();
            PDDocument top = prepareSmallPdf();
            PDDocument bottom = prepareBiggerPdf()) {
        join(document, top, bottom);
        document.save(new File(RESULT_FOLDER, "joinedPage.pdf"));
    }
}