Java Code Examples for org.apache.pdfbox.pdmodel.PDPageTree#getCount()

The following examples show how to use org.apache.pdfbox.pdmodel.PDPageTree#getCount() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PdfTools.java    From MyBox with Apache License 2.0 6 votes vote down vote up
public static List<PDImageXObject> getImageListFromPDF(PDDocument document,
        Integer startPage) throws Exception {
    List<PDImageXObject> imageList = new ArrayList<>();
    if (null != document) {
        PDPageTree pages = document.getPages();
        startPage = startPage == null ? 0 : startPage;
        int len = pages.getCount();
        if (startPage < len) {
            for (int i = startPage; i < len; ++i) {
                PDPage page = pages.get(i);
                Iterable<COSName> objectNames = page.getResources().getXObjectNames();
                for (COSName imageObjectName : objectNames) {
                    if (page.getResources().isImageXObject(imageObjectName)) {
                        imageList.add((PDImageXObject) page.getResources().getXObject(imageObjectName));
                    }
                }
            }
        }
    }
    return imageList;
}
 
Example 2
Source File: PdfScreenshotUtils.java    From dss with GNU Lesser General Public License v2.1 6 votes vote down vote up
public static void checkPdfSimilarity(PDDocument document1, PDDocument document2, float minSimilarity) throws IOException {
	PDPageTree samplePageTree = document1.getPages();
	PDPageTree checkPageTree = document2.getPages();

	assertEquals(checkPageTree.getCount(), samplePageTree.getCount());

	PDFRenderer sampleRenderer = new PDFRenderer(document1);
	PDFRenderer checkRenderer = new PDFRenderer(document2);

	for (int pageNumber = 0; pageNumber < checkPageTree.getCount(); pageNumber++) {
		BufferedImage sampleImage = sampleRenderer.renderImageWithDPI(pageNumber, DPI);
		BufferedImage checkImage = checkRenderer.renderImageWithDPI(pageNumber, DPI);
		
           // ImageIO.write(sampleImage, "png", new File("target\\sampleImage.png"));
           // ImageIO.write(checkImage, "png", new File("target\\checkImage.png"));
           
		float checkSimilarity = checkImageSimilarity(sampleImage, checkImage, CHECK_RESOLUTION);
		assertTrue(checkSimilarity >= minSimilarity, "The image similarity " + checkSimilarity + " is lower the allowed limit " + minSimilarity);
	}
}
 
Example 3
Source File: PdfContentImagePreprocessor.java    From tika-server with Apache License 2.0 5 votes vote down vote up
private void removeImagesAlphaChannelUnsafe() {
    try {
        PDPageTree allPages = document.getDocumentCatalog().getPages();
        for (int i = 0; i < allPages.getCount(); i++) {
            PDPage page = allPages.get(i);
            processImagesFromResources(page.getResources());
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}
 
Example 4
Source File: PdfContentTypeChecker.java    From tika-server with Apache License 2.0 5 votes vote down vote up
private void calculateObjectsInDocument(PDDocument document) throws IOException {
    this.pdfTextStripper = new PDFTextStripper();

    try {
        PDPageTree allPages = document.getDocumentCatalog().getPages();
        this.pageCount = allPages.getCount();
        for (int i = 0; i < allPages.getCount(); i++) {
            PDPage page = allPages.get(i);
            readObjectsOnPage(page);
            calculateTextLengthOnPage(document, i + 1);
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}
 
Example 5
Source File: DetermineWidgetPage.java    From testarea-pdfbox2 with Apache License 2.0 5 votes vote down vote up
int determineSafe(PDDocument document, PDAnnotationWidget widget) throws IOException
{
    COSDictionary widgetObject = widget.getCOSObject();
    PDPageTree pages = document.getPages();
    for (int i = 0; i < pages.getCount(); i++)
    {
        for (PDAnnotation annotation : pages.get(i).getAnnotations())
        {
            COSDictionary annotationObject = annotation.getCOSObject();
            if (annotationObject.equals(widgetObject))
                return i;
        }
    }
    return -1;
}
 
Example 6
Source File: RemoveStrikeoutComment.java    From testarea-pdfbox2 with Apache License 2.0 5 votes vote down vote up
/**
 * <a href="https://stackoverflow.com/questions/45812696/pdfbox-delete-comment-maintain-strikethrough">
 * PDFBox delete comment maintain strikethrough
 * </a>
 * <br/>
 * <a href="https://expirebox.com/files/3d955e6df4ca5874c38dbf92fc43b5af.pdf">
 * only_fields.pdf
 * </a>
 * <a href="https://file.io/DTvqhC">
 * (alternative download)
 * </a>
 * <p>
 * Due to a bug in the <code>COSArrayList</code> usage for page annotations,
 * the indirect reference to the annotation in question is not removed from
 * the actual page annotations array.
 * </p>
 */
@Test
public void testRemoveLikeStephan() throws IOException {
    try (InputStream resource = getClass().getResourceAsStream("only_fields.pdf")) {
        PDDocument document = Loader.loadPDF(resource);
        List<PDAnnotation> annotations = new ArrayList<>();
        PDPageTree allPages = document.getDocumentCatalog().getPages();

        for (int i = 0; i < allPages.getCount(); i++) {
            PDPage page = allPages.get(i);
            annotations = page.getAnnotations();

            List<PDAnnotation> annotationToRemove = new ArrayList<PDAnnotation>();

            if (annotations.size() < 1)
                continue;
            else {
                for (PDAnnotation annotation : annotations) {

                    if (annotation.getContents() != null
                            && annotation.getContents().equals("Sample Strikethrough")) {
                        annotationToRemove.add(annotation);
                    }
                }
                annotations.removeAll(annotationToRemove);
            }
        }

        document.save(new File(RESULT_FOLDER, "only_fields-removeLikeStephan.pdf"));
    }
}
 
Example 7
Source File: RemoveStrikeoutComment.java    From testarea-pdfbox2 with Apache License 2.0 5 votes vote down vote up
/**
 * <a href="https://stackoverflow.com/questions/45812696/pdfbox-delete-comment-maintain-strikethrough">
 * PDFBox delete comment maintain strikethrough
 * </a>
 * <br/>
 * <a href="https://expirebox.com/files/3d955e6df4ca5874c38dbf92fc43b5af.pdf">
 * only_fields.pdf
 * </a>
 * <a href="https://file.io/DTvqhC">
 * (alternative download)
 * </a>
 * <p>
 * The OP only wanted the comment removed, not the strike-through. Thus, we must
 * not remove the annotation but merely the comment building attributes.
 * </p>
 */
@Test
public void testRemoveLikeStephanImproved() throws IOException {
    final COSName POPUP = COSName.getPDFName("Popup");
    try (InputStream resource = getClass().getResourceAsStream("only_fields.pdf")) {
        PDDocument document = Loader.loadPDF(resource);
        List<PDAnnotation> annotations = new ArrayList<>();
        PDPageTree allPages = document.getDocumentCatalog().getPages();

        List<COSObjectable> objectsToRemove = new ArrayList<>();

        for (int i = 0; i < allPages.getCount(); i++) {
            PDPage page = allPages.get(i);
            annotations = page.getAnnotations();

            for (PDAnnotation annotation : annotations) {
                if ("StrikeOut".equals(annotation.getSubtype()))
                {
                    COSDictionary annotationDict = annotation.getCOSObject();
                    COSBase popup = annotationDict.getItem(POPUP);
                    annotationDict.removeItem(POPUP);
                    annotationDict.removeItem(COSName.CONTENTS); // plain text comment
                    annotationDict.removeItem(COSName.RC);       // rich text comment
                    annotationDict.removeItem(COSName.T);        // author

                    if (popup != null)
                        objectsToRemove.add(popup);
                }
            }

            annotations.removeAll(objectsToRemove);
        }

        document.save(new File(RESULT_FOLDER, "only_fields-removeImproved.pdf"));
    }
}
 
Example 8
Source File: DashboardUtil.java    From Insights with Apache License 2.0 2 votes vote down vote up
/**
 * Get previous page in the document.
 * 
 * @param document
 * @return {pageNum}
 */
private static int getPages(PDDocument document) {
	PDPageTree pages = document.getPages();
	return pages.getCount()-1;
}