Java Code Examples for org.apache.pdfbox.pdmodel.PDDocument#getNumberOfPages()

The following examples show how to use org.apache.pdfbox.pdmodel.PDDocument#getNumberOfPages() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PdfBoxConverter.java    From workable-converter with GNU General Public License v3.0 8 votes vote down vote up
@Override
public boolean byFileFolder(ConvertRequire require) throws ConvertFailedException {
    logger.info("pdfbox convert by file folder begin(src file must be a pdf file) :");
    try {
        File srcFile = new File(require.getWaitingFilePath());
        PDDocument document = PDDocument.load(srcFile);
        PDFRenderer renderer = new PDFRenderer(document);
        for (int page = 0; page < document.getNumberOfPages(); page++) {
            BufferedImage image = renderer.renderImageWithDPI(page, 300, ImageType.RGB);
            String savename = require.getDestConvertedPath() + "/" + SuffixTool.deleteSuffix(srcFile.getName()) + "_" + page + ".png";
            ImageIOUtil.writeImage(image, savename, 300);
        }
        document.close();
    } catch (Exception e) {
        throw new ConvertFailedException(e.getMessage());
    }
    logger.info("pdf box convert by filepath success");
    return true;
}
 
Example 2
Source File: PdfBoxConverter.java    From workable-converter with GNU General Public License v3.0 6 votes vote down vote up
/**
 * convert a pdf base64 file to png pics
 */
@Override
public boolean byBase64 (ConvertRequire require) throws ConvertFailedException {
    logger.info("pdfbox convert by base64 begin(src file must be a pdf file):");
    try {
        String srcFileTmpName = StrRandomTool.getUuid(true) + ".pdf";
        Base64FileTool.saveBase64File(require.getSrcBase64(), params.getTmpPath() + "/" + srcFileTmpName);
        File srcFile = new File(params.getTmpPath() + "/" + srcFileTmpName);
        PDDocument document = PDDocument.load(srcFile);
        PDFRenderer renderer = new PDFRenderer(document);
        List<String> results = new LinkedList<>();
        for (int page = 0; page < document.getNumberOfPages(); page++) {
            BufferedImage image = renderer.renderImageWithDPI(page, 300, ImageType.RGB);
            String savePath = params.getTmpPath() + "/" + SuffixTool.deleteSuffix(srcFile.getName()) + "_" + page + ".png";
            ImageIOUtil.writeImage(image, savePath, 300);
            results.add(Base64FileTool.filePathToBase64(savePath));
        }
        require.setDestBase64s(results);
        document.close();
    } catch (Exception e) {
        throw new ConvertFailedException(e.getMessage());
    }
    logger.info("pdf box convert by base64 success");
    return true;
}
 
Example 3
Source File: PdfBoxUtilities.java    From tess4j with Apache License 2.0 6 votes vote down vote up
/**
 * Gets PDF Page Count.
 *
 * @param inputPdfFile input file
 * @return number of pages
 */
public static int getPdfPageCount(File inputPdfFile) {
    PDDocument document = null;
    try {
        document = PDDocument.load(inputPdfFile);
        return document.getNumberOfPages();
    } catch (IOException ioe) {
        logger.error("Error counting PDF pages => " + ioe);
        return - 1;
    } finally {
        if (document != null) {
            try {
                document.close();
            } catch (Exception e) {
            }
        }
    }
}
 
Example 4
Source File: PdfComparator.java    From pdfcompare with Apache License 2.0 6 votes vote down vote up
private void addExtraPages(final PDDocument document, final PDFRenderer pdfRenderer, final int minPageCount,
        final int color, final boolean expected) throws IOException {
    for (int pageIndex = minPageCount; pageIndex < document.getNumberOfPages(); pageIndex++) {
        ImageWithDimension image = renderPageAsImage(document, pdfRenderer, pageIndex, environment);
        final DataBuffer dataBuffer = image.bufferedImage.getRaster().getDataBuffer();
        for (int i = 0; i < image.bufferedImage.getWidth() * MARKER_WIDTH; i++) {
            dataBuffer.setElem(i, color);
        }
        for (int i = 0; i < image.bufferedImage.getHeight(); i++) {
            for (int j = 0; j < MARKER_WIDTH; j++) {
                dataBuffer.setElem(i * image.bufferedImage.getWidth() + j, color);
            }
        }
        if (expected) {
            compareResult.addPage(new PageDiffCalculator(new PageArea(pageIndex + 1)), pageIndex, image, blank(image), image);
        } else {
            compareResult.addPage(new PageDiffCalculator(new PageArea(pageIndex + 1)), pageIndex, blank(image), image, image);
        }
    }
}
 
Example 5
Source File: PdfComparator.java    From pdfcompare with Apache License 2.0 6 votes vote down vote up
private void compare(final PDDocument expectedDocument, final PDDocument actualDocument) throws IOException {
    expectedDocument.setResourceCache(new ResourceCacheWithLimitedImages(environment));
    PDFRenderer expectedPdfRenderer = new PDFRenderer(expectedDocument);

    actualDocument.setResourceCache(new ResourceCacheWithLimitedImages(environment));
    PDFRenderer actualPdfRenderer = new PDFRenderer(actualDocument);

    final int minPageCount = Math.min(expectedDocument.getNumberOfPages(), actualDocument.getNumberOfPages());
    CountDownLatch latch = new CountDownLatch(minPageCount);
    for (int pageIndex = 0; pageIndex < minPageCount; pageIndex++) {
        drawImage(latch, pageIndex, expectedDocument, actualDocument, expectedPdfRenderer, actualPdfRenderer);
    }
    Utilities.await(latch, "FullCompare", environment);
    Utilities.shutdownAndAwaitTermination(drawExecutor, "Draw");
    Utilities.shutdownAndAwaitTermination(parrallelDrawExecutor, "Parallel Draw");
    Utilities.shutdownAndAwaitTermination(diffExecutor, "Diff");
    if (expectedDocument.getNumberOfPages() > minPageCount) {
        addExtraPages(expectedDocument, expectedPdfRenderer, minPageCount, environment.getActualColor().getRGB(), true);
    } else if (actualDocument.getNumberOfPages() > minPageCount) {
        addExtraPages(actualDocument, actualPdfRenderer, minPageCount, environment.getExpectedColor().getRGB(), false);
    }
}
 
Example 6
Source File: ExtractText.java    From testarea-pdfbox2 with Apache License 2.0 6 votes vote down vote up
/**
 * <a href="https://stackoverflow.com/questions/45895768/pdfbox-2-0-7-extracttext-not-working-but-1-8-13-does-and-pdfreader-as-well">
 * PDFBox 2.0.7 ExtractText not working but 1.8.13 does and PDFReader as well
 * </a>
 * <br/>
 * <a href="https://wetransfer.com/downloads/214674449c23713ee481c5a8f529418320170827201941/b2bea6">
 * test-2.pdf
 * </a>
 * <p>
 * Due to the broken <b>ToUnicode</b> maps the output of immediate text
 * extraction from this document is unsatisfying, cf. {@link #testTest2()}.
 * It can be improved by removing these <b>ToUnicode</b> maps as this test
 * shows.
 * </p>
 */
@Test
public void testNoToUnicodeTest2() throws IOException
{
    try (   InputStream resource = getClass().getResourceAsStream("test-2.pdf")    )
    {
        PDDocument document = Loader.loadPDF(resource);

        for (int pageNr = 0; pageNr < document.getNumberOfPages(); pageNr++)
        {
            PDPage page = document.getPage(pageNr);
            PDResources resources = page.getResources();
            removeToUnicodeMaps(resources);
        }

        PDFTextStripper stripper = new PDFTextStripper();
        String text = stripper.getText(document);

        System.out.printf("\n*\n* test-2.pdf without ToUnicode\n*\n%s\n", text);
        Files.write(new File(RESULT_FOLDER, "test-2_NoToUnicode.txt").toPath(), Collections.singleton(text));
    }
}
 
Example 7
Source File: ImageExtractor.java    From inception with Apache License 2.0 6 votes vote down vote up
static void processFile(File inFile, int dpi, String outDir) throws IOException
{
    PDDocument doc = PDDocument.load(inFile);
    String baseName = inFile.getName().substring(0, inFile.getName().lastIndexOf("."));
    try {
        RegionExtractor regionExt = new RegionExtractor(doc, dpi);
        int count = 1;
        for (int pageIndex = 0; pageIndex < doc.getNumberOfPages(); pageIndex++) {
            for (ImageOperator op : ImageExtractor.extract(doc.getPage(pageIndex))) {
                RenderedImage image = regionExt.extract(pageIndex, op.x, op.y, op.w, op.h);
                String outFileName = baseName + "_" + String.valueOf(count) + ".png";
                ImageIO.write(image, "png", new File(outDir, outFileName));
                System.out.println(outFileName + " is saved.");
                count++;
            }
        }
    }
    finally {
        doc.close();
    }
}
 
Example 8
Source File: ConvertTest.java    From blog-codes with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws InvalidPasswordException, IOException {
		PDDocument document = PDDocument.load(new File("/home/lili/data/testen.pdf"));
		PDFRenderer pdfRenderer = new PDFRenderer(document);
		for (int page = 0; page < document.getNumberOfPages(); ++page) {
			if(page>0 && page %100==0) {
				System.out.println("page: "+page);
			}
//			float w=document.getPage(page).getMediaBox().getWidth();
//			float h=document.getPage(page).getMediaBox().getHeight();
//			System.out.println(String.format("w: %f, h: %f",w, h));
			BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
			//BufferedImage bim = pdfRenderer.renderImage(page, 2.0f);
			
			// suffix in filename will be used as the file format
			ImageIOUtil.writeImage(bim, "/home/lili/data/testen-" + (page + 1) + ".png", 300);
		}
	}
 
Example 9
Source File: WaterMarkConverter.java    From workable-converter with GNU General Public License v3.0 6 votes vote down vote up
@Override
public boolean byStream(ConvertRequire require) throws ConvertFailedException {
    try {
        PDDocument pdfFile = PDDocument.load(require.getSrcStream());
        HashMap<Integer, String> overlayGuide = new HashMap<>();

        String tmpName = this.getTmpName(require.getWaterMarkRequire());
        //0 means add watermark in all page
        if (require.getWaterMarkRequire().getWaterMarkPage() == 0) {
            for (int i = 0; i < pdfFile.getNumberOfPages(); i++) {
                overlayGuide.put(i + 1, tmpName);
            }
        } else {
            overlayGuide.put(require.getWaterMarkRequire().getWaterMarkPage(), tmpName);
        }
        Overlay overlay = new Overlay();
        overlay.setInputPDF(pdfFile);
        overlay.setOverlayPosition(Overlay.Position.BACKGROUND);
        overlay.overlay(overlayGuide);
        pdfFile.save(require.getDestStream());
    } catch (IOException e) {
        throw new ConvertFailedException(e.getMessage());
    }
    return true;
}
 
Example 10
Source File: PDFPresentation.java    From Quelea with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Make the slides that go in this PDF, this is what takes time and should
 * only be done once.
 *
 * @return all the slides.
 */
private PdfSlide[] makeSlides() throws IOException {
    File pdf = new File(file);
    PDDocument document = PDDocument.load(pdf.getAbsoluteFile());
    Path f = Files.createTempDirectory(null);
    f.toFile().deleteOnExit();
    ArrayList<PdfSlide> ret = new ArrayList<>();
    PDFRenderer pdfRenderer = new PDFRenderer(document);
    int totalPages = document.getNumberOfPages();
    for (int i = 0; i < totalPages; i++) {
        ret.add(new PdfSlide(i + 1, pdfRenderer));
    }
    document.close();
    return ret.toArray(new PdfSlide[ret.size()]);
}
 
Example 11
Source File: SurvivorSongbookParser.java    From Quelea with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Get all the songs in the PDF document.
 * @return a list of all the songs.
 * @throws IOException if something went wrong.
 */
@Override
public List<SongDisplayable> getSongs(File location, StatusPanel statusPanel) throws IOException {
    PDDocument document = PDDocument.load(location);
    List<SongDisplayable> pdfSongs = new ArrayList<>();
    PDFTextStripper stripper = new PDFTextStripper();
    List<String> songParts = new ArrayList<>();
    for (int i = 0; i < document.getNumberOfPages(); i++) {
        String pageText = getPageText(document, stripper, i);
        if (pageText.trim().isEmpty()) {
            continue;
        }
        songParts.add(pageText);
        boolean twoPart = pageText.contains("(1 of");
        if (i < document.getNumberOfPages() - 1) { //This section in case the original (1 of x) is missed out
            String nextPageText = getPageText(document, stripper, i + 1);
            if (nextPageText.contains("(2 of")) {
                twoPart = true;
            }
        }
        if (!twoPart) {
            SongDisplayable song = processSong(songParts.toArray(new String[songParts.size()]));
            if (song != null) {
                pdfSongs.add(song);
            }
            songParts.clear();
        }
    }
    document.close();
    if (pdfSongs == null) {
        return new ArrayList<>();
    }
    else {
        return pdfSongs;
    }
}
 
Example 12
Source File: PDFCreator.java    From Knowage-Server with GNU Affero General Public License v3.0 5 votes vote down vote up
private static void writePageNumbering(PDDocument doc, PDFont font, float fontSize, PageNumbering pageNumbering) throws IOException {
	int totalPages = doc.getNumberOfPages();
	int numberOfPages = pageNumbering.isLastIncluded() ? doc.getNumberOfPages() : doc.getNumberOfPages() - 1;
	for (int pageIndex = pageNumbering.isFirstIncluded() ? 0 : 1; pageIndex < numberOfPages; pageIndex++) {
		String footer = "Page " + (pageIndex + 1) + " of " + totalPages;
		PDPage page = doc.getPage(pageIndex);
		PDRectangle pageSize = page.getMediaBox();
		float stringWidth = font.getStringWidth(footer) * fontSize / 1000f;
		float stringHeight = font.getFontDescriptor().getFontBoundingBox().getHeight() * fontSize / 1000f;

		int rotation = page.getRotation();
		boolean rotate = rotation == 90 || rotation == 270;
		float pageWidth = rotate ? pageSize.getHeight() : pageSize.getWidth();
		float pageHeight = rotate ? pageSize.getWidth() : pageSize.getHeight();
		float startX = rotate ? pageHeight / 2f : (pageWidth - stringWidth - stringHeight) / 2f;
		float startY = rotate ? (pageWidth - stringWidth) : stringHeight;

		// append the content to the existing stream
		try (PDPageContentStream contentStream = new PDPageContentStream(doc, page, AppendMode.APPEND, true, true)) {

			// draw rectangle
			contentStream.setNonStrokingColor(255, 255, 255); // gray background
			// Draw a white filled rectangle
			drawRect(contentStream, Color.WHITE, new java.awt.Rectangle((int) startX, (int) startY - 3, (int) stringWidth + 2, (int) stringHeight), true);
			writeText(contentStream, new Color(4, 44, 86), font, fontSize, rotate, startX, startY, footer);
		}
	}
}
 
Example 13
Source File: WaterMarkConverter.java    From workable-converter with GNU General Public License v3.0 5 votes vote down vote up
@Override
public boolean byBase64(ConvertRequire require) throws ConvertFailedException {
    try {
        String tmpFileName = this.saveTmpFileByBase64(require.getSrcBase64(), "pdf");
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        PDDocument pdfFile = PDDocument.load(new File(tmpFileName));
        HashMap<Integer, String> overlayGuide = new HashMap<>();
        String tmpName = this.getTmpName(require.getWaterMarkRequire());
        //0 means add watermark in all page
        if (require.getWaterMarkRequire().getWaterMarkPage() == 0) {
            for (int i = 0; i < pdfFile.getNumberOfPages(); i++) {
                overlayGuide.put(i + 1, tmpName);
            }
        } else {
            overlayGuide.put(require.getWaterMarkRequire().getWaterMarkPage(), tmpName);
        }
        Overlay overlay = new Overlay();
        overlay.setInputPDF(pdfFile);
        overlay.setOverlayPosition(Overlay.Position.BACKGROUND);
        overlay.overlay(overlayGuide);
        pdfFile.save(byteArrayOutputStream);
        require.setDestBase64(Base64FileTool.ByteArrayToBase64(byteArrayOutputStream.toByteArray()));
    } catch (IOException e) {
        throw new ConvertFailedException(e.getMessage());
    }
    return true;
}
 
Example 14
Source File: PDF2ImageExample.java    From tutorials with MIT License 5 votes vote down vote up
private static void generateImageFromPDF(String filename, String extension) throws IOException {
	PDDocument document = PDDocument.load(new File(filename));
	PDFRenderer pdfRenderer = new PDFRenderer(document);
	for (int page = 0; page < document.getNumberOfPages(); ++page) {
		BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
		ImageIOUtil.writeImage(bim, String.format("src/output/pdf-%d.%s", page + 1, extension), 300);
	}
	document.close();
}
 
Example 15
Source File: WaterMarkConverter.java    From workable-converter with GNU General Public License v3.0 5 votes vote down vote up
@Override
public boolean byFilePath(ConvertRequire require) throws ConvertFailedException {
    try {
        PDDocument pdfFile = PDDocument.load(new File(require.getWaitingFilePath()));
        HashMap<Integer, String> overlayGuide = new HashMap<>();

        String tmpName = this.getTmpName(require.getWaterMarkRequire());

        //0 means add watermark in all page
        if (require.getWaterMarkRequire().getWaterMarkPage() == 0) {
            for (int i = 0; i < pdfFile.getNumberOfPages(); i++) {
                overlayGuide.put(i + 1, tmpName);
            }
        } else {
            overlayGuide.put(require.getWaterMarkRequire().getWaterMarkPage(), tmpName);
        }
        Overlay overlay = new Overlay();
        overlay.setInputPDF(pdfFile);
        overlay.setOverlayPosition(Overlay.Position.BACKGROUND);
        overlay.overlay(overlayGuide);
        pdfFile.save(require.getResultFilePath());
    } catch ( IOException e) {
        throw new ConvertFailedException(e.getMessage());
    }

    return true;
}
 
Example 16
Source File: ConvertToImages.java    From blog-codes with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws InvalidPasswordException, IOException {
	System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider"); 

	String path="/home/lili/下载/books/汪曾祺全集1.pdf";
	String dir="/home/lili/data/wang/book1";
	new File(dir).mkdirs();
	PDDocument document = PDDocument.load(new File(path));
	PDFRenderer pdfRenderer = new PDFRenderer(document);
	for (int page = 0; page < document.getNumberOfPages(); ++page) {
		BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
		ImageIOUtil.writeImage(bim, dir + "/" + (page + 1) + ".png", 300);
	}
	document.close();
}
 
Example 17
Source File: TxtCreator.java    From pdf-converter with Apache License 2.0 4 votes vote down vote up
public void process(File pdf, File output){
    PDDocument pdDoc;
    try {//Kudos for closing: http://stackoverflow.com/questions/156508/closing-a-java-fileinputstream
        File tmpfile = File.createTempFile(String.format("txttmp-%s", UUID.randomUUID().toString()), null);
        RandomAccessFile raf = new RandomAccessFile(tmpfile, "rw");
        pdDoc = PDDocument.loadNonSeq(pdf, raf);
        FileWriter writer = new FileWriter(output);
        try {
            PDFTextStripper stripper = new PDFTextStripper();
            int numberOfPages = pdDoc.getNumberOfPages();

            for (int j = 1; j < numberOfPages+1; j++) {
                stripper.setStartPage(j);
                stripper.setEndPage(j);
                writer.write(stripper.getText(pdDoc));
                writer.flush();
            }
        } finally {
            pdDoc.close();
            raf.close();
            tmpfile.delete();
            writer.close();
        }
    } catch (IOException ioe) {
        log.warn(String.format("Failed to create txt for file: %s", pdf.getName()), ioe);
    }
}
 
Example 18
Source File: PdfBoxUtilities.java    From tess4j with Apache License 2.0 4 votes vote down vote up
/**
 * Converts PDF to PNG format.
 *
 * @param inputPdfFile input file
 * @return an array of PNG images
 * @throws java.io.IOException
 */
public static File[] convertPdf2Png(File inputPdfFile) throws IOException {
    Path path = Files.createTempDirectory("tessimages");
    File imageDir = path.toFile();

    PDDocument document = null;
    try {
        document = PDDocument.load(inputPdfFile);
        PDFRenderer pdfRenderer = new PDFRenderer(document);
        for (int page = 0; page < document.getNumberOfPages(); ++page) {
            BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);

            // suffix in filename will be used as the file format
            String filename = String.format("workingimage%04d.png", page + 1);
            ImageIOUtil.writeImage(bim, new File(imageDir, filename).getAbsolutePath(), 300);
        }
    } catch (IOException ioe) {
        logger.error("Error extracting PDF Document => " + ioe);
    } finally {
        if (imageDir.list().length == 0) {
            imageDir.delete();
        }

        if (document != null) {
            try {
                document.close();
            } catch (Exception e) {
            }
        }
    }

    // find working files
    File[] workingFiles = imageDir.listFiles(new FilenameFilter() {

        @Override
        public boolean accept(File dir, String name) {
            return name.toLowerCase().matches("workingimage\\d{4}\\.png$");
        }
    });

    Arrays.sort(workingFiles, new Comparator<File>() {
        @Override
        public int compare(File f1, File f2) {
            return f1.getName().compareTo(f2.getName());
        }
    });

    return workingFiles;
}
 
Example 19
Source File: PageExtractor.java    From gcs with Mozilla Public License 2.0 4 votes vote down vote up
/** 
 * Creates a new instance of PageExtractor
 * @param sourceDocument The document to split.
 */
public PageExtractor(PDDocument sourceDocument)
{
    this.sourceDocument = sourceDocument;
    endPage = sourceDocument.getNumberOfPages();
}
 
Example 20
Source File: ExtractText.java    From testarea-pdfbox2 with Apache License 2.0 4 votes vote down vote up
/**
 * @see #testUiPathTutorial()
 * @author Venkatachalam Neelakantan
 */
public String getTextUsingPositionsUsingPdf(String pdfLocation, int pageNumber, double x, double y, double width,
        double height) throws IOException {
    String extractedText = "";
    // PDDocument Creates an empty PDF document. You need to add at least
    // one page for the document to be valid.
    // Using load method we can load a PDF document
    PDDocument document = null;
    PDPage page = null;
    try {
        if (pdfLocation.endsWith(".pdf")) {
            document = Loader.loadPDF(new File(pdfLocation));
            int getDocumentPageCount = document.getNumberOfPages();
            System.out.println(getDocumentPageCount);

            // Get specific page. THe parameter is pageindex which starts with // 0. If we need to
            // access the first page then // the pageIdex is 0 PDPage
            if (getDocumentPageCount > 0) {
                page = document.getPage(pageNumber + 1);
            } else if (getDocumentPageCount == 0) {
                page = document.getPage(0);
            }
            // To create a rectangle by passing the x axis, y axis, width and height 
            Rectangle2D rect = new Rectangle2D.Double(x, y, width, height);
            String regionName = "region1";

            // Strip the text from PDF using PDFTextStripper Area with the
            // help of Rectangle and named need to given for the rectangle
            PDFTextStripperByArea stripper = new PDFTextStripperByArea();
            stripper.setSortByPosition(true);
            stripper.addRegion(regionName, rect);
            stripper.extractRegions(page);
            System.out.println("Region is " + stripper.getTextForRegion("region1"));
            extractedText = stripper.getTextForRegion("region1");
        } else {
            System.out.println("No data return");
        }
    } catch (IOException e) {
        System.out.println("The file  not found" + "");
    } finally {
        document.close();
    }
    // Return the extracted text and this can be used for assertion
    return extractedText;
}