Java Code Examples for org.apache.pdfbox.pdmodel.PDPage.getResources()

The following are Jave code examples for showing how to use getResources() of the org.apache.pdfbox.pdmodel.PDPage class. You can vote up the examples you like. Your votes will be used in our system to get more good examples.
Example 1
Project: testarea-pdfbox2   File: RenderType3Character.java   Source Code and License Vote up 9 votes
/**
 * <a href="http://stackoverflow.com/questions/42032729/render-type3-font-character-as-image-using-pdfbox">
 * Render Type3 font character as image using PDFBox
 * </a>
 * <br/>
 * <a href="https://drive.google.com/file/d/0B0f6X4SAMh2KRDJTbm4tb3E1a1U/view">
 * 4700198773.pdf
 * </a>
 * from
 * <a href="http://stackoverflow.com/questions/37754112/extract-text-with-custom-font-result-non-readble">
 * extract text with custom font result non readble
 * </a>
 * <p>
 * This test shows how one can render individual Type 3 font glyphs as bitmaps.
 * Unfortunately PDFBox out-of-the-box does not provide a class to render contents
 * of arbitrary XObjects, merely for rendering pages; thus, we simply create a page
 * with the glyph in question and render that page.   
 * </p>
 * <p>
 * As the OP did not provide a sample PDF, we simply use one from another
 * stackoverflow question. There obviously might remain issues with the
 * OP's files.
 * </p>
 */
@Test
public void testRenderSdnList() throws IOException
{
    try (   InputStream resource = getClass().getResourceAsStream("sdnlist.pdf"))
    {
        PDDocument document = PDDocument.load(resource);

        PDPage page = document.getPage(1);
        PDResources pageResources = page.getResources();
        COSName f1Name = COSName.getPDFName("R144");
        PDType3Font fontF1 = (PDType3Font) pageResources.getFont(f1Name);
        Map<String, Integer> f1NameToCode = fontF1.getEncoding().getNameToCodeMap();

        COSDictionary charProcsDictionary = fontF1.getCharProcs();
        for (COSName key : charProcsDictionary.keySet())
        {
            COSStream stream = (COSStream) charProcsDictionary.getDictionaryObject(key);
            PDType3CharProc charProc = new PDType3CharProc(fontF1, stream);
            PDRectangle bbox = charProc.getGlyphBBox();
            if (bbox == null)
                bbox = charProc.getBBox();
            Integer code = f1NameToCode.get(key.getName());

            if (code != null)
            {
                PDDocument charDocument = new PDDocument();
                PDPage charPage = new PDPage(bbox);
                charDocument.addPage(charPage);
                charPage.setResources(pageResources);
                PDPageContentStream charContentStream = new PDPageContentStream(charDocument, charPage);
                charContentStream.beginText();
                charContentStream.setFont(fontF1, bbox.getHeight());
                charContentStream.getOutput().write(String.format("<%2X> Tj\n", code).getBytes());
                charContentStream.endText();
                charContentStream.close();

                File result = new File(RESULT_FOLDER, String.format("sdnlist-%s-%s.png", key.getName(), code));
                PDFRenderer renderer = new PDFRenderer(charDocument);
                BufferedImage image = renderer.renderImageWithDPI(0, 96);
                ImageIO.write(image, "PNG", result);
                charDocument.save(new File(RESULT_FOLDER, String.format("sdnlist-%s-%s.pdf", key.getName(), code)));
                charDocument.close();
            }
        }
    }
}
 
Example 2
Project: testarea-pdfbox2   File: RenderType3Character.java   Source Code and License Vote up 8 votes
/**
 * <a href="http://stackoverflow.com/questions/42032729/render-type3-font-character-as-image-using-pdfbox">
 * Render Type3 font character as image using PDFBox
 * </a>
 * <br/>
 * <a href="https://drive.google.com/file/d/0B0f6X4SAMh2KRDJTbm4tb3E1a1U/view">
 * 4700198773.pdf
 * </a>
 * from
 * <a href="http://stackoverflow.com/questions/37754112/extract-text-with-custom-font-result-non-readble">
 * extract text with custom font result non readble
 * </a>
 * <p>
 * This test shows how one can render individual Type 3 font glyphs as bitmaps.
 * Unfortunately PDFBox out-of-the-box does not provide a class to render contents
 * of arbitrary XObjects, merely for rendering pages; thus, we simply create a page
 * with the glyph in question and render that page.   
 * </p>
 * <p>
 * As the OP did not provide a sample PDF, we simply use one from another
 * stackoverflow question. There obviously might remain issues with the
 * OP's files.
 * </p>
 */
@Test
public void testRender4700198773() throws IOException
{
    try (   InputStream resource = getClass().getResourceAsStream("4700198773.pdf"))
    {
        PDDocument document = PDDocument.load(resource);

        PDPage page = document.getPage(0);
        PDResources pageResources = page.getResources();
        COSName f1Name = COSName.getPDFName("F1");
        PDType3Font fontF1 = (PDType3Font) pageResources.getFont(f1Name);
        Map<String, Integer> f1NameToCode = fontF1.getEncoding().getNameToCodeMap();

        COSDictionary charProcsDictionary = fontF1.getCharProcs();
        for (COSName key : charProcsDictionary.keySet())
        {
            COSStream stream = (COSStream) charProcsDictionary.getDictionaryObject(key);
            PDType3CharProc charProc = new PDType3CharProc(fontF1, stream);
            PDRectangle bbox = charProc.getGlyphBBox();
            if (bbox == null)
                bbox = charProc.getBBox();
            Integer code = f1NameToCode.get(key.getName());

            if (code != null)
            {
                PDDocument charDocument = new PDDocument();
                PDPage charPage = new PDPage(bbox);
                charDocument.addPage(charPage);
                charPage.setResources(pageResources);
                PDPageContentStream charContentStream = new PDPageContentStream(charDocument, charPage);
                charContentStream.beginText();
                charContentStream.setFont(fontF1, bbox.getHeight());
                charContentStream.getOutput().write(String.format("<%2X> Tj\n", code).getBytes());
                charContentStream.endText();
                charContentStream.close();

                File result = new File(RESULT_FOLDER, String.format("4700198773-%s-%s.png", key.getName(), code));
                PDFRenderer renderer = new PDFRenderer(charDocument);
                BufferedImage image = renderer.renderImageWithDPI(0, 96);
                ImageIO.write(image, "PNG", result);
                charDocument.save(new File(RESULT_FOLDER, String.format("4700198773-%s-%s.pdf", key.getName(), code)));
                charDocument.close();
            }
        }
    }
}
 
Example 3
Project: IDBuilderFX   File: ExtractPhoto.java   Source Code and License Vote up 7 votes
public static void findPhoto(String path,int empId) throws IOException, SQLException, Error{
		// Loading an existing document
		int imageFound=0;
		File file = new File(path);
		PDDocument document=PDDocument.load(file);
		PDPageTree list=document.getPages();
		for(PDPage page:list){						//check in all pages of pdf
			PDResources pdResources=page.getResources();		//get all resources
			for(COSName cosName:pdResources.getXObjectNames())		//loop for all resources
			{
				PDXObject pdxObject=pdResources.getXObject(cosName);
				 if (pdxObject instanceof PDImageXObject) {			//check that the resource is image
		                BufferedImage br=((PDImageXObject) pdxObject).getImage();
		                RgbImage im = RgbImageJ2se.toRgbImage(br);
		                // step #3 - convert image to greyscale 8-bits
		                RgbAvgGray toGray = new RgbAvgGray();
		                toGray.push(im);
		                // step #4 - initialize face detector with correct Haar profile
		                InputStream is  = ExtractPhoto.class.getResourceAsStream("/haar/HCSB.txt");
		                Gray8DetectHaarMultiScale detectHaar = new Gray8DetectHaarMultiScale(is, 1,40);
		                // step #5 - apply face detector to grayscale image
		                List<Rect> result= detectHaar.pushAndReturn(toGray.getFront());
		                if(result.size()!=0)
		                {
		                database.StorePhoto.storePhoto(empId,br);
		                imageFound=1;
		                break;
		                }
				 }
			}
			if(imageFound==1)
				break;
}
		System.out.println(imageFound);
		if(imageFound!=1){
			BufferedImage in = ImageIO.read(ExtractPhoto.class.getResource("/images/nopic.jpg"));
            database.StorePhoto.storePhoto(empId,in);
		}
	document.close();	
	}
 
Example 4
Project: testarea-pdfbox2   File: ExtractText.java   Source Code and License Vote up 7 votes
/**
 * <a href="https://stackoverflow.com/questions/45895768/pdfbox-2-0-7-extracttext-not-working-but-1-8-13-does-and-pdfreader-as-well">
 * PDFBox 2.0.7 ExtractText not working but 1.8.13 does and PDFReader as well
 * </a>
 * <br/>
 * <a href="https://wetransfer.com/downloads/214674449c23713ee481c5a8f529418320170827201941/b2bea6">
 * test-2.pdf
 * </a>
 * <p>
 * Due to the broken <b>ToUnicode</b> maps the output of immediate text
 * extraction from this document is unsatisfying, cf. {@link #testTest2()}.
 * It can be improved by removing these <b>ToUnicode</b> maps as this test
 * shows.
 * </p>
 */
@Test
public void testNoToUnicodeTest2() throws IOException
{
    try (   InputStream resource = getClass().getResourceAsStream("test-2.pdf")    )
    {
        PDDocument document = PDDocument.load(resource);

        for (int pageNr = 0; pageNr < document.getNumberOfPages(); pageNr++)
        {
            PDPage page = document.getPage(pageNr);
            PDResources resources = page.getResources();
            removeToUnicodeMaps(resources);
        }

        PDFTextStripper stripper = new PDFTextStripper();
        String text = stripper.getText(document);

        System.out.printf("\n*\n* test-2.pdf without ToUnicode\n*\n%s\n", text);
        Files.write(new File(RESULT_FOLDER, "test-2_NoToUnicode.txt").toPath(), Collections.singleton(text));
    }
}
 
Example 5
Project: memoria-politica   File: FedDepPhotosUtility.java   Source Code and License Vote up 6 votes
private static void buildImgKeyToImageObjMapping(PDPage page) throws IOException {
    PDResources resources = page.getResources();
    Map images = resources.getImages();

    if(images != null) {
        Iterator imageIter = images.keySet().iterator();

        while(imageIter.hasNext()) {
            String key = (String)imageIter.next();
            PDXObjectImage image = (PDXObjectImage)images.get(key);
            mapImgKeyToImageObj.put(key, image);
        }
    }
}
 
Example 6
Project: testarea-pdfbox2   File: ExtractImages.java   Source Code and License Vote up 6 votes
/**
 * <a href="http://stackoverflow.com/questions/40531871/how-can-i-check-if-pdf-page-is-imagescanned-by-pdfbox-xpdf">
 * How can I check if PDF page is image(scanned) by PDFBOX, XPDF
 * </a>
 * <br/>
 * <a href="https://drive.google.com/file/d/0B9izTHWJQ7xlT2ZoQkJfbGRYcFE">
 * 10948.pdf
 * </a>
 * <p>
 * The only special thing about the two images returned for the sample PDF is that
 * one image is merely a mask used for the other image, and the other image is the
 * actual image used on the PDF page. If one only wants the images immediately used
 * in the page content, one also has to scan the page content.
 * </p>
 */
@Test
public void testExtractPageImageResources10948() throws IOException
{
    try (   InputStream resource = getClass().getResourceAsStream("10948.pdf"))
    {
        PDDocument document = PDDocument.load(resource);
        int page = 1;
        for (PDPage pdPage : document.getPages())
        {
            PDResources resources = pdPage.getResources();
            if (resource != null)
            {
                int index = 0;
                for (COSName cosName : resources.getXObjectNames())
                {
                    PDXObject xobject = resources.getXObject(cosName);
                    if (xobject instanceof PDImageXObject)
                    {
                        PDImageXObject image = (PDImageXObject)xobject;
                        File file = new File(RESULT_FOLDER, String.format("10948-%s-%s.%s", page, index, image.getSuffix()));
                        ImageIO.write(image.getImage(), image.getSuffix(), file);
                        index++;
                    }
                }
            }
            page++;
        }
    }
}
 
Example 7
Project: testarea-pdfbox2   File: ExtractImages.java   Source Code and License Vote up 6 votes
/**
 * <a href="http://stackoverflow.com/questions/40531871/how-can-i-check-if-pdf-page-is-imagescanned-by-pdfbox-xpdf">
 * How can I check if PDF page is image(scanned) by PDFBOX, XPDF
 * </a>
 * <br/>
 * <a href="https://drive.google.com/open?id=0B9izTHWJQ7xlYi1XN1BxMmZEUGc">
 * 10948.pdf
 * </a>, renamed "10948-new.pdf" here to prevent a collision
 * <p>
 * Here the code extracts no image at all because the images are not immediate page
 * resources but wrapped in form xobjects.
 * </p>
 */
@Test
public void testExtractPageImageResources10948New() throws IOException
{
    try (   InputStream resource = getClass().getResourceAsStream("10948-new.pdf"))
    {
        PDDocument document = PDDocument.load(resource);
        int page = 1;
        for (PDPage pdPage : document.getPages())
        {
            PDResources resources = pdPage.getResources();
            if (resource != null)
            {
                int index = 0;
                for (COSName cosName : resources.getXObjectNames())
                {
                    PDXObject xobject = resources.getXObject(cosName);
                    if (xobject instanceof PDImageXObject)
                    {
                        PDImageXObject image = (PDImageXObject)xobject;
                        File file = new File(RESULT_FOLDER, String.format("10948-new-%s-%s.%s", page, index, image.getSuffix()));
                        ImageIO.write(image.getImage(), image.getSuffix(), file);
                        index++;
                    }
                }
            }
            page++;
        }
    }
}