org.apache.pdfbox.pdmodel.graphics.PDXObject Java Examples

The following examples show how to use org.apache.pdfbox.pdmodel.graphics.PDXObject. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PdfContentImagePreprocessor.java    From tika-server with Apache License 2.0 7 votes vote down vote up
private void processImagesFromResources(PDResources resources) throws IOException {
    for (COSName xObjectName : resources.getXObjectNames()) {
        PDXObject xObject = resources.getXObject(xObjectName);

        if (xObject instanceof PDFormXObject) {
            processImagesFromResources(((PDFormXObject) xObject).getResources());
        } else if (xObject instanceof PDImageXObject) {
            PDImageXObject img = (PDImageXObject) xObject;
            if (!img.getImage().getColorModel().hasAlpha())
                return;

            PDImageXObject cpy = makeImageObjectCopy(img);
            resources.put(xObjectName, cpy);
            imagesWereChanged = true;
        }
    }
}
 
Example #2
Source File: DefaultResourceCache.java    From gcs with Mozilla Public License 2.0 6 votes vote down vote up
@Override
public PDXObject getXObject(COSObject indirect) throws IOException
{
    SoftReference<PDXObject> xobject = xobjects.get(indirect);
    if (xobject != null)
    {
        return xobject.get();
    }
    return null;
}
 
Example #3
Source File: PDFBoxTree.java    From Pdf2Dom with GNU Lesser General Public License v3.0 6 votes vote down vote up
protected void processImageOperation(List<COSBase> arguments) throws IOException
{
    COSName objectName = (COSName)arguments.get( 0 );
    PDXObject xobject = getResources().getXObject( objectName );
    if (xobject instanceof PDImageXObject)
    {
        PDImageXObject pdfImage = (PDImageXObject) xobject;
        BufferedImage outputImage = pdfImage.getImage();
        outputImage = rotateImage(outputImage);

        ImageResource imageData = new ImageResource(getTitle(), outputImage);

        Rectangle2D bounds = calculateImagePosition(pdfImage);
        float x = (float) bounds.getX();
        float y = (float) bounds.getY();

        renderImage(x, y, (float) bounds.getWidth(), (float) bounds.getHeight(), imageData);
    }
}
 
Example #4
Source File: PdfFontExtractor.java    From FontVerter with GNU Lesser General Public License v3.0 6 votes vote down vote up
private void extractFontResources(PDResources resources) throws IOException {
    for (COSName key : resources.getFontNames()) {
        PDFont font = resources.getFont(key);
        extractStrategy.extract(font);
    }

    for (COSName name : resources.getXObjectNames()) {
        PDXObject xobject = resources.getXObject(name);
        if (xobject instanceof PDFormXObject) {
            PDFormXObject xObjectForm = (PDFormXObject) xobject;
            PDResources formResources = xObjectForm.getResources();

            if (formResources != null)
                extractFontResources(formResources);
        }
    }
}
 
Example #5
Source File: DrawObject.java    From gcs with Mozilla Public License 2.0 6 votes vote down vote up
@Override
public void process(Operator operator, List<COSBase> arguments) throws IOException
{
    if (arguments.isEmpty())
    {
        throw new MissingOperandException(operator, arguments);
    }
    COSBase base0 = arguments.get(0);
    if (!(base0 instanceof COSName))
    {
        return;
    }
    COSName name = (COSName) base0;
    PDXObject xobject =  context.getResources().getXObject(name);
    ((PDFMarkedContentExtractor) context).xobject(xobject);

    if (xobject instanceof PDTransparencyGroup)
    {
        context.showTransparencyGroup((PDTransparencyGroup) xobject);
    }
    else if (xobject instanceof PDFormXObject)
    {
        PDFormXObject form = (PDFormXObject) xobject;
        context.showForm(form);
    }
}
 
Example #6
Source File: PDSoftMask.java    From gcs with Mozilla Public License 2.0 6 votes vote down vote up
/**
 * Returns the G entry of the soft mask object
 * 
 * @return form containing the transparency group
 * @throws IOException
 */
public PDTransparencyGroup getGroup() throws IOException
{
    if (group == null)
    {
        COSBase cosGroup = getCOSObject().getDictionaryObject(COSName.G);
        if (cosGroup != null)
        {
            PDXObject x = PDXObject.createXObject(cosGroup, null);
            if (x instanceof PDTransparencyGroup)
            {
                group = (PDTransparencyGroup) x;
            }
        }
    }
    return group;
}
 
Example #7
Source File: PDResources.java    From gcs with Mozilla Public License 2.0 6 votes vote down vote up
/**
 * Returns the XObject resource with the given name, or null if none exists.
 * 
 * @param name Name of the XObject resource.
 * 
 * @return the XObject resource of the given name.
 * 
 * @throws IOException if something went wrong.
 */
public PDXObject getXObject(COSName name) throws IOException
{
    COSObject indirect = getIndirect(COSName.XOBJECT, name);
    if (cache != null && indirect != null)
    {
        PDXObject cached = cache.getXObject(indirect);
        if (cached != null)
        {
            return cached;
        }
    }

    // get the instance
    PDXObject xobject;
    COSBase value = get(COSName.XOBJECT, name);
    if (value == null)
    {
        xobject = null;
    }
    else if (value instanceof COSObject)
    {
        xobject = PDXObject.createXObject(((COSObject) value).getObject(), this);
    }
    else
    {
        xobject = PDXObject.createXObject(value, this);
    }
    if (cache != null && isAllowedCache(xobject))
    {
        cache.put(indirect, xobject);
    }
    return xobject;
}
 
Example #8
Source File: DrawObject.java    From gcs with Mozilla Public License 2.0 5 votes vote down vote up
@Override
public void process(Operator operator, List<COSBase> operands) throws IOException
{
    if (operands.isEmpty())
    {
        throw new MissingOperandException(operator, operands);
    }
    COSBase base0 = operands.get(0);
    if (!(base0 instanceof COSName))
    {
        return;
    }
    COSName objectName = (COSName) base0;
    PDXObject xobject = context.getResources().getXObject(objectName);

    if (xobject == null)
    {
        throw new MissingResourceException("Missing XObject: " + objectName.getName());
    }
    else if (xobject instanceof PDImageXObject)
    {
        PDImageXObject image = (PDImageXObject)xobject;
        context.drawImage(image);
    }
    else if (xobject instanceof PDTransparencyGroup)
    {
        getContext().showTransparencyGroup((PDTransparencyGroup) xobject);
    }
    else if (xobject instanceof PDFormXObject)
    {
        getContext().showForm((PDFormXObject) xobject);
    }
}
 
Example #9
Source File: PdfContentTypeChecker.java    From tika-server with Apache License 2.0 5 votes vote down vote up
private void getImagesFromResources(PDResources resources) throws IOException {
    for (COSName xObjectName : resources.getXObjectNames()) {
        PDXObject xObject = resources.getXObject(xObjectName);

        if (xObject instanceof PDFormXObject) {
            getImagesFromResources(((PDFormXObject) xObject).getResources());
        } else if (xObject instanceof PDImageXObject) {
            //((PDImageXObject) xObject).getImage();
            imagesCount++;
        }
    }
}
 
Example #10
Source File: PDFBoxTree.java    From Pdf2Dom with GNU Lesser General Public License v3.0 5 votes vote down vote up
private void processFontResources(PDResources resources, FontTable table) throws IOException
{
    String fontNotSupportedMessage = "Font: {} skipped because type '{}' is not supported.";

    for (COSName key : resources.getFontNames())
    {
        PDFont font = resources.getFont(key);
        if (font instanceof PDTrueTypeFont)
        {
            table.addEntry( font);
            log.debug("Font: " + font.getName() + " TTF");
        }
        else if (font instanceof PDType0Font)
        {
            PDCIDFont descendantFont = ((PDType0Font) font).getDescendantFont();
            if (descendantFont instanceof PDCIDFontType2)
                table.addEntry(font);
            else
                log.warn(fontNotSupportedMessage, font.getName(), font.getClass().getSimpleName());
        }
        else if (font instanceof PDType1CFont)
            table.addEntry(font);
        else
            log.warn(fontNotSupportedMessage, font.getName(), font.getClass().getSimpleName());
    }

    for (COSName name : resources.getXObjectNames())
    {
        PDXObject xobject = resources.getXObject(name);
        if (xobject instanceof PDFormXObject)
        {
            PDFormXObject xObjectForm = (PDFormXObject) xobject;
            PDResources formResources = xObjectForm.getResources();
            if (formResources != null && formResources != resources && formResources.getCOSObject() != resources.getCOSObject())
                processFontResources(formResources, table);
        }
    }

}
 
Example #11
Source File: ExtractImages.java    From testarea-pdfbox2 with Apache License 2.0 5 votes vote down vote up
/**
 * <a href="http://stackoverflow.com/questions/40531871/how-can-i-check-if-pdf-page-is-imagescanned-by-pdfbox-xpdf">
 * How can I check if PDF page is image(scanned) by PDFBOX, XPDF
 * </a>
 * <br/>
 * <a href="https://drive.google.com/open?id=0B9izTHWJQ7xlYi1XN1BxMmZEUGc">
 * 10948.pdf
 * </a>, renamed "10948-new.pdf" here to prevent a collision
 * <p>
 * Here the code extracts no image at all because the images are not immediate page
 * resources but wrapped in form xobjects.
 * </p>
 */
@Test
public void testExtractPageImageResources10948New() throws IOException
{
    try (   InputStream resource = getClass().getResourceAsStream("10948-new.pdf"))
    {
        PDDocument document = Loader.loadPDF(resource);
        int page = 1;
        for (PDPage pdPage : document.getPages())
        {
            PDResources resources = pdPage.getResources();
            if (resource != null)
            {
                int index = 0;
                for (COSName cosName : resources.getXObjectNames())
                {
                    PDXObject xobject = resources.getXObject(cosName);
                    if (xobject instanceof PDImageXObject)
                    {
                        PDImageXObject image = (PDImageXObject)xobject;
                        File file = new File(RESULT_FOLDER, String.format("10948-new-%s-%s.%s", page, index, image.getSuffix()));
                        ImageIO.write(image.getImage(), image.getSuffix(), file);
                        index++;
                    }
                }
            }
            page++;
        }
    }
}
 
Example #12
Source File: ExtractImages.java    From testarea-pdfbox2 with Apache License 2.0 5 votes vote down vote up
/**
 * <a href="http://stackoverflow.com/questions/40531871/how-can-i-check-if-pdf-page-is-imagescanned-by-pdfbox-xpdf">
 * How can I check if PDF page is image(scanned) by PDFBOX, XPDF
 * </a>
 * <br/>
 * <a href="https://drive.google.com/file/d/0B9izTHWJQ7xlT2ZoQkJfbGRYcFE">
 * 10948.pdf
 * </a>
 * <p>
 * The only special thing about the two images returned for the sample PDF is that
 * one image is merely a mask used for the other image, and the other image is the
 * actual image used on the PDF page. If one only wants the images immediately used
 * in the page content, one also has to scan the page content.
 * </p>
 */
@Test
public void testExtractPageImageResources10948() throws IOException
{
    try (   InputStream resource = getClass().getResourceAsStream("10948.pdf"))
    {
        PDDocument document = Loader.loadPDF(resource);
        int page = 1;
        for (PDPage pdPage : document.getPages())
        {
            PDResources resources = pdPage.getResources();
            if (resource != null)
            {
                int index = 0;
                for (COSName cosName : resources.getXObjectNames())
                {
                    PDXObject xobject = resources.getXObject(cosName);
                    if (xobject instanceof PDImageXObject)
                    {
                        PDImageXObject image = (PDImageXObject)xobject;
                        File file = new File(RESULT_FOLDER, String.format("10948-%s-%s.%s", page, index, image.getSuffix()));
                        ImageIO.write(image.getImage(), image.getSuffix(), file);
                        index++;
                    }
                }
            }
            page++;
        }
    }
}
 
Example #13
Source File: ExtractText.java    From testarea-pdfbox2 with Apache License 2.0 5 votes vote down vote up
void removeToUnicodeMaps(PDResources pdResources) throws IOException
{
    COSDictionary resources = pdResources.getCOSObject();

    COSDictionary fonts = asDictionary(resources, COSName.FONT);
    if (fonts != null)
    {
        for (COSBase object : fonts.getValues())
        {
            while (object instanceof COSObject)
                object = ((COSObject)object).getObject();
            if (object instanceof COSDictionary)
            {
                COSDictionary font = (COSDictionary)object;
                font.removeItem(COSName.TO_UNICODE);
            }
        }
    }

    for (COSName name : pdResources.getXObjectNames())
    {
        PDXObject xobject = pdResources.getXObject(name);
        if (xobject instanceof PDFormXObject)
        {
            PDResources xobjectPdResources = ((PDFormXObject)xobject).getResources();
            removeToUnicodeMaps(xobjectPdResources);
        }
    }
}
 
Example #14
Source File: ImageExtractor.java    From inception with Apache License 2.0 5 votes vote down vote up
@Override protected void processOperator(Operator operator, List<COSBase> operands)
    throws IOException
{
    String operation = operator.getName();
    if ("Do".equals(operation)) {
        COSName objectName = (COSName) operands.get(0);
        PDXObject xobject = getResources().getXObject(objectName);

        if (xobject instanceof PDImageXObject) {
            PDImageXObject image = (PDImageXObject) xobject;
            Matrix ctmNew = getGraphicsState().getCurrentTransformationMatrix();
            PDRectangle pageRect = this.getCurrentPage().getCropBox();
            float w = ctmNew.getScalingFactorX();
            float h = ctmNew.getScalingFactorY();
            float x = ctmNew.getTranslateX();
            float y = pageRect.getHeight() - ctmNew.getTranslateY() - h;
            buffer.add(new ImageOperator(x, y, w, h));
        }
        else if (xobject instanceof PDFormXObject) {
            PDFormXObject form = (PDFormXObject) xobject;
            showForm(form);
        }
    }
    else {
        super.processOperator(operator, operands);
    }
}
 
Example #15
Source File: ResourceCacheWithLimitedImages.java    From pdfcompare with Apache License 2.0 5 votes vote down vote up
@Override
public void put(COSObject indirect, PDXObject xobject) throws IOException {
    final int length = xobject.getStream().getLength();
    if (length > environment.getMaxImageSize()) {
        LOG.trace("Not caching image with Size: {}", length);
        return;
    }
    if (xobject instanceof PDImageXObject) {
        PDImageXObject imageObj = (PDImageXObject) xobject;
        if (imageObj.getWidth() * imageObj.getHeight() > environment.getMaxImageSize()) {
            return;
        }
    }
    this.xobjects.put(indirect, new SoftReference<>(xobject));
}
 
Example #16
Source File: ResourceCacheWithLimitedImages.java    From pdfcompare with Apache License 2.0 5 votes vote down vote up
@Override
public PDXObject getXObject(COSObject indirect) throws IOException {
    SoftReference<PDXObject> xobject = this.xobjects.get(indirect);
    if (xobject != null) {
        return xobject.get();
    }
    return null;
}
 
Example #17
Source File: DrawObject.java    From gcs with Mozilla Public License 2.0 5 votes vote down vote up
@Override
public void process(Operator operator, List<COSBase> arguments) throws IOException
{
    if (arguments.isEmpty())
    {
        throw new MissingOperandException(operator, arguments);
    }
    COSBase base0 = arguments.get(0);
    if (!(base0 instanceof COSName))
    {
        return;
    }
    COSName name = (COSName) base0;

    if (context.getResources().isImageXObject(name))
    {
        // we're done here, don't decode images when doing text extraction
        return;
    }
    
    PDXObject xobject = context.getResources().getXObject(name);

    if (xobject instanceof PDTransparencyGroup)
    {
        context.showTransparencyGroup((PDTransparencyGroup) xobject);
    }
    else if (xobject instanceof PDFormXObject)
    {
        PDFormXObject form = (PDFormXObject) xobject;
        context.showForm(form);
    }
}
 
Example #18
Source File: PDResources.java    From gcs with Mozilla Public License 2.0 5 votes vote down vote up
private boolean isAllowedCache(PDXObject xobject)
{
    if (xobject instanceof PDImageXObject)
    {
        COSBase colorSpace = xobject.getCOSObject().getDictionaryObject(COSName.COLORSPACE);
        if (colorSpace instanceof COSName)
        {
            // don't cache if it might use page resources, see PDFBOX-2370 and PDFBOX-3484
            COSName colorSpaceName = (COSName) colorSpace;
            if (colorSpaceName.equals(COSName.DEVICECMYK) && hasColorSpace(COSName.DEFAULT_CMYK))
            {
                return false;
            }
            if (colorSpaceName.equals(COSName.DEVICERGB) && hasColorSpace(COSName.DEFAULT_RGB))
            {
                return false;
            }
            if (colorSpaceName.equals(COSName.DEVICEGRAY) && hasColorSpace(COSName.DEFAULT_GRAY))
            {
                return false;
            }
            if (hasColorSpace(colorSpaceName))
            {
                return false;
            }
        }
    }
    return true;
}
 
Example #19
Source File: PDAcroForm.java    From gcs with Mozilla Public License 2.0 5 votes vote down vote up
/**
 * Check if there is a translation needed to place the annotations content.
 * 
 * @param appearanceStream
 * @return the need for a translation transformation.
 */
private boolean resolveNeedsTranslation(PDAppearanceStream appearanceStream)
{
    boolean needsTranslation = true;

    PDResources resources = appearanceStream.getResources();
    if (resources != null && resources.getXObjectNames().iterator().hasNext())
    {
        Iterator<COSName> xObjectNames = resources.getXObjectNames().iterator();

        while (xObjectNames.hasNext())
        {
            try
            {
                // if the BBox of the PDFormXObject does not start at 0,0
                // there is no need do translate as this is done by the BBox definition.
                PDXObject xObject = resources.getXObject(xObjectNames.next());
                if (xObject instanceof PDFormXObject)
                {
                    PDRectangle bbox = ((PDFormXObject)xObject).getBBox();
                    float llX = bbox.getLowerLeftX();
                    float llY = bbox.getLowerLeftY();
                    if (Float.compare(llX, 0) != 0 && Float.compare(llY, 0) != 0)
                    {
                        needsTranslation = false;
                    }
                }
            }
            catch (IOException e)
            {
                // we can safely ignore the exception here
                // as this might only cause a misplacement
            }
        }
        return needsTranslation;
    }
    
    return true;
}
 
Example #20
Source File: PDObjectReference.java    From gcs with Mozilla Public License 2.0 5 votes vote down vote up
/**
 * Gets a higher-level object for the referenced object.
 * Currently this method may return a {@link PDAnnotation},
 * a {@link PDXObject} or <code>null</code>.
 * 
 * @return a higher-level object for the referenced object
 */
public COSObjectable getReferencedObject()
{
    COSBase obj = this.getCOSObject().getDictionaryObject(COSName.OBJ);
    if (!(obj instanceof COSDictionary))
    {
        return null;
    }
    try
    {
        if (obj instanceof COSStream)
        {
            PDXObject xobject = PDXObject.createXObject(obj, null); // <-- TODO: valid?
            if (xobject != null)
            {
                return xobject;
            }
        }
        COSDictionary objDictionary  = (COSDictionary)obj;
        PDAnnotation annotation = PDAnnotation.createAnnotation(obj);
        /*
         * COSName.TYPE is optional, so if annotation is of type unknown and
         * COSName.TYPE is not COSName.ANNOT it still may be an annotation.
         * TODO shall we return the annotation object instead of null?
         * what else can be the target of the object reference?
         */
        if (!(annotation instanceof PDAnnotationUnknown) 
                || COSName.ANNOT.equals(objDictionary.getDictionaryObject(COSName.TYPE))) 
        {
            return annotation;
        }
    }
    catch (IOException exception)
    {
        // this can only happen if the target is an XObject.
    }
    return null;
}
 
Example #21
Source File: PDPageContentStream.java    From gcs with Mozilla Public License 2.0 5 votes vote down vote up
/**
 * Draw an xobject(form or image) using the given {@link AffineTransform} to position
 * the xobject.
 *
 * @param xobject The xobject to draw.
 * @param transform the transformation matrix
 * @throws IOException If there is an error writing to the stream.
 * @throws IllegalStateException If the method was called within a text block.
 * @deprecated Use {@link #drawImage(PDImageXObject, Matrix) drawImage(PDImageXObject, Matrix)}
 * or {@link #drawForm(PDFormXObject) drawForm(PDFormXObject)} with
 * {@link #transform(Matrix) transform(Matrix)} instead.
 */
@Deprecated
public void drawXObject(PDXObject xobject, AffineTransform transform) throws IOException
{
    if (inTextMode)
    {
        throw new IllegalStateException("Error: drawXObject is not allowed within a text block.");
    }

    String xObjectPrefix;
    if (xobject instanceof PDImageXObject)
    {
        xObjectPrefix = "Im";
    }
    else
    {
        xObjectPrefix = "Form";
    }
    COSName objMapping = resources.add(xobject, xObjectPrefix);

    saveGraphicsState();
    transform(new Matrix(transform));

    writeOperand(objMapping);
    writeOperator(OperatorName.DRAW_OBJECT);

    restoreGraphicsState();
}
 
Example #22
Source File: DefaultResourceCache.java    From gcs with Mozilla Public License 2.0 4 votes vote down vote up
@Override
public void put(COSObject indirect, PDXObject xobject) throws IOException
{
    xobjects.put(indirect, new SoftReference<PDXObject>(xobject));
}
 
Example #23
Source File: ResourceCacheWithLimitedImages.java    From pdfcompare with Apache License 2.0 4 votes vote down vote up
@Override
protected boolean removeEldestEntry(final Entry<COSObject, SoftReference<PDXObject>> eldest) {
    return size() > environment.getNrOfImagesToCache();
}
 
Example #24
Source File: DummyResourceCache.java    From pdfcompare with Apache License 2.0 4 votes vote down vote up
@Override
public PDXObject getXObject(final COSObject indirect) throws IOException {
    return null;
}
 
Example #25
Source File: DummyResourceCache.java    From pdfcompare with Apache License 2.0 4 votes vote down vote up
@Override
public void put(final COSObject indirect, final PDXObject xobject) throws IOException {}
 
Example #26
Source File: PDPageContentStream.java    From gcs with Mozilla Public License 2.0 3 votes vote down vote up
/**
 * Draw an xobject(form or image) at the x,y coordinates and a certain width and height.
 *
 * @param xobject The xobject to draw.
 * @param x The x-coordinate to draw the image.
 * @param y The y-coordinate to draw the image.
 * @param width The width of the image to draw.
 * @param height The height of the image to draw.
 *
 * @throws IOException If there is an error writing to the stream.
 * @deprecated Use {@link #drawImage} instead.
 */
@Deprecated
public void drawXObject(PDXObject xobject, float x, float y, float width, float height) throws IOException
{
    AffineTransform transform = new AffineTransform(width, 0, 0, height, x, y);
    drawXObject(xobject, transform);
}
 
Example #27
Source File: PDMarkedContent.java    From gcs with Mozilla Public License 2.0 2 votes vote down vote up
/**
 * Adds an XObject to the contents.
 * 
 * @param xobject the XObject
 */
public void addXObject(PDXObject xobject)
{
    this.getContents().add(xobject);
}
 
Example #28
Source File: PDObjectReference.java    From gcs with Mozilla Public License 2.0 2 votes vote down vote up
/**
 * Sets the referenced XObject.
 * 
 * @param xobject the referenced XObject
 */
public void setReferencedObject(PDXObject xobject)
{
    this.getCOSObject().setItem(COSName.OBJ, xobject);
}
 
Example #29
Source File: ResourceCache.java    From gcs with Mozilla Public License 2.0 2 votes vote down vote up
/**
 * Puts the given indirect XObject resource in the cache.
 * 
 * @param indirect the indirect object of the resource.
 * @param xobject the XObject resource.
 * 
 * @throws IOException if something went wrong.
 */
void put(COSObject indirect, PDXObject xobject) throws IOException;
 
Example #30
Source File: ResourceCache.java    From gcs with Mozilla Public License 2.0 2 votes vote down vote up
/**
 * Returns the XObject resource for the given indirect object, if it is in the cache.
 * 
 * @param indirect the indirect object
 * 
 * @return the XObject resource of the given indirect object.
 * @throws IOException if something went wrong.
 */
PDXObject getXObject(COSObject indirect) throws IOException;