Java Code Examples for org.apache.pdfbox.cos.COSDictionary#getItem()

The following examples show how to use org.apache.pdfbox.cos.COSDictionary#getItem() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PDResources.java    From gcs with Mozilla Public License 2.0 6 votes vote down vote up
/**
 * Returns the resource with the given name and kind as an indirect object, or null.
 */
private COSObject getIndirect(COSName kind, COSName name)
{
    COSDictionary dict = (COSDictionary)resources.getDictionaryObject(kind);
    if (dict == null)
    {
        return null;
    }
    COSBase base = dict.getItem(name);
    if (base instanceof COSObject)
    {
        return (COSObject)base;
    }
    // not an indirect object. Resource may have been added at runtime.
    return null;
}
 
Example 2
Source File: COSParser.java    From gcs with Mozilla Public License 2.0 5 votes vote down vote up
private void addExcludedToList(COSName[] excludeObjects, COSDictionary dict, final Set<Long> parsedObjects)
{
    if (excludeObjects != null)
    {
        for (COSName objName : excludeObjects)
        {
            COSBase baseObj = dict.getItem(objName);
            if (baseObj instanceof COSObject)
            {
                parsedObjects.add(getObjectId((COSObject) baseObj));
            }
        }
    }
}
 
Example 3
Source File: PDOptionalContentProperties.java    From gcs with Mozilla Public License 2.0 5 votes vote down vote up
/**
 * Returns the base state for optional content groups.
 * @return the base state
 */
public BaseState getBaseState()
{
    COSDictionary d = getD();
    COSName name = (COSName)d.getItem(COSName.BASE_STATE);
    return BaseState.valueOf(name);
}
 
Example 4
Source File: PDDocument.java    From gcs with Mozilla Public License 2.0 5 votes vote down vote up
private void assignAcroFormDefaultResource(PDAcroForm acroForm, COSDictionary newDict)
{
    // read and set/update AcroForm default resource dictionary /DR if available
    COSBase newBase = newDict.getDictionaryObject(COSName.DR);
    if (newBase instanceof COSDictionary)
    {
        COSDictionary newDR = (COSDictionary) newBase;
        PDResources defaultResources = acroForm.getDefaultResources();
        if (defaultResources == null)
        {
            acroForm.getCOSObject().setItem(COSName.DR, newDR);
            newDR.setDirect(true);
            newDR.setNeedToBeUpdated(true);            
        }
        else
        {
            COSDictionary oldDR = defaultResources.getCOSObject();
            COSBase newXObjectBase = newDR.getItem(COSName.XOBJECT);
            COSBase oldXObjectBase = oldDR.getItem(COSName.XOBJECT);
            if (newXObjectBase instanceof COSDictionary &&
                oldXObjectBase instanceof COSDictionary)
            {
                ((COSDictionary) oldXObjectBase).addAll((COSDictionary) newXObjectBase);
                oldDR.setNeedToBeUpdated(true);
            }
        }
    }
}
 
Example 5
Source File: SecurityHandler.java    From gcs with Mozilla Public License 2.0 5 votes vote down vote up
/**
 * This will decrypt a dictionary.
 *
 * @param dictionary The dictionary to decrypt.
 * @param objNum The object number.
 * @param genNum The object generation number.
 *
 * @throws IOException If there is an error creating a new string.
 */
private void decryptDictionary(COSDictionary dictionary, long objNum, long genNum) throws IOException
{
    if (dictionary.getItem(COSName.CF) != null)
    {
        // PDFBOX-2936: avoid orphan /CF dictionaries found in US govt "I-" files
        return;
    }
    COSBase type = dictionary.getDictionaryObject(COSName.TYPE);
    boolean isSignature = COSName.SIG.equals(type) || COSName.DOC_TIME_STAMP.equals(type) ||
            // PDFBOX-4466: /Type is optional, see
            // https://ec.europa.eu/cefdigital/tracker/browse/DSS-1538
            (dictionary.getDictionaryObject(COSName.CONTENTS) instanceof COSString && 
             dictionary.getDictionaryObject(COSName.BYTERANGE) instanceof COSArray);
    for (Map.Entry<COSName, COSBase> entry : dictionary.entrySet())
    {
        if (isSignature && COSName.CONTENTS.equals(entry.getKey()))
        {
            // do not decrypt the signature contents string
            continue;
        }
        COSBase value = entry.getValue();
        // within a dictionary only the following kind of COS objects have to be decrypted
        if (value instanceof COSString || value instanceof COSArray || value instanceof COSDictionary)
        {
            decrypt(value, objNum, genNum);
        }
    }
}
 
Example 6
Source File: RemoveStrikeoutComment.java    From testarea-pdfbox2 with Apache License 2.0 5 votes vote down vote up
/**
 * <a href="https://stackoverflow.com/questions/45812696/pdfbox-delete-comment-maintain-strikethrough">
 * PDFBox delete comment maintain strikethrough
 * </a>
 * <br/>
 * <a href="https://expirebox.com/files/3d955e6df4ca5874c38dbf92fc43b5af.pdf">
 * only_fields.pdf
 * </a>
 * <a href="https://file.io/DTvqhC">
 * (alternative download)
 * </a>
 * <p>
 * The OP only wanted the comment removed, not the strike-through. Thus, we must
 * not remove the annotation but merely the comment building attributes.
 * </p>
 */
@Test
public void testRemoveLikeStephanImproved() throws IOException {
    final COSName POPUP = COSName.getPDFName("Popup");
    try (InputStream resource = getClass().getResourceAsStream("only_fields.pdf")) {
        PDDocument document = Loader.loadPDF(resource);
        List<PDAnnotation> annotations = new ArrayList<>();
        PDPageTree allPages = document.getDocumentCatalog().getPages();

        List<COSObjectable> objectsToRemove = new ArrayList<>();

        for (int i = 0; i < allPages.getCount(); i++) {
            PDPage page = allPages.get(i);
            annotations = page.getAnnotations();

            for (PDAnnotation annotation : annotations) {
                if ("StrikeOut".equals(annotation.getSubtype()))
                {
                    COSDictionary annotationDict = annotation.getCOSObject();
                    COSBase popup = annotationDict.getItem(POPUP);
                    annotationDict.removeItem(POPUP);
                    annotationDict.removeItem(COSName.CONTENTS); // plain text comment
                    annotationDict.removeItem(COSName.RC);       // rich text comment
                    annotationDict.removeItem(COSName.T);        // author

                    if (popup != null)
                        objectsToRemove.add(popup);
                }
            }

            annotations.removeAll(objectsToRemove);
        }

        document.save(new File(RESULT_FOLDER, "only_fields-removeImproved.pdf"));
    }
}
 
Example 7
Source File: OptimizeAfterMerge.java    From testarea-pdfbox2 with Apache License 2.0 5 votes vote down vote up
/**
 * <a href="https://stackoverflow.com/questions/53420344/ho-to-reduce-the-size-of-merged-pdf-a1-b-files-with-pdfbox-or-other-java-library">
 * Ho to reduce the size of merged PDF A1/b Files with pdfbox or other java library
 * </a>
 * <br/>
 * <a href="https://datentransfer.sparkassenverlag.de/my/transfers/5q8eskgne52npemx8kid7728zk1hq3f993dfat8his">
 * dummy.pdf
 * </a>
 * <p>
 * This is the code the OP himself posted as his solution. This only works if
 * (a) all font programs embedded for the same name indeed are identical, and
 * if (b) all fonts to consider are in the immediate page resources, not the
 * resources of some referred to xobject or pattern. If those conditions are
 * fulfilled, though, it very likely is much faster than the approach in
 * {@link #optimize(PDDocument)}. For the example file provided by the OP,
 * its result is nearly as small.
 * </p>
 */
@Test
public void testOptimizeLikeSchowaveDummy() throws IOException {
    try (   InputStream resource = getClass().getResourceAsStream("dummy.pdf")  ) {
        PDDocument doc = Loader.loadPDF(resource);

        Map<String, COSBase> fontFileCache = new HashMap<>();
        for (int pageNumber = 0; pageNumber < doc.getNumberOfPages(); pageNumber++) {
            final PDPage page = doc.getPage(pageNumber);
            COSDictionary pageDictionary = (COSDictionary) page.getResources().getCOSObject().getDictionaryObject(COSName.FONT);
            for (COSName currentFont : pageDictionary.keySet()) {
                COSDictionary fontDictionary = (COSDictionary) pageDictionary.getDictionaryObject(currentFont);
                for (COSName actualFont : fontDictionary.keySet()) {
                    COSBase actualFontDictionaryObject = fontDictionary.getDictionaryObject(actualFont);
                    if (actualFontDictionaryObject instanceof COSDictionary) {
                        COSDictionary fontFile = (COSDictionary) actualFontDictionaryObject;
                        if (fontFile.getItem(COSName.FONT_NAME) instanceof COSName) {
                            COSName fontName = (COSName) fontFile.getItem(COSName.FONT_NAME);
                            fontFileCache.computeIfAbsent(fontName.getName(), key -> fontFile.getItem(COSName.FONT_FILE2));
                            fontFile.setItem(COSName.FONT_FILE2, fontFileCache.get(fontName.getName()));
                        }
                    }
                }
            }
        }

        doc.save(new File(RESULT_FOLDER, "dummy-optimized-like-schowave.pdf"));
    }
}
 
Example 8
Source File: COSParser.java    From gcs with Mozilla Public License 2.0 4 votes vote down vote up
/**
 * Read the trailer information and provide a COSDictionary containing the trailer information.
 * 
 * @return a COSDictionary containing the trailer information
 * @throws IOException if something went wrong
 */
protected COSDictionary retrieveTrailer() throws IOException
{
    COSDictionary trailer = null;
    boolean rebuildTrailer = false;
    try
    {
        // parse startxref
        // TODO FDF files don't have a startxref value, so that rebuildTrailer is triggered
        long startXRefOffset = getStartxrefOffset();
        if (startXRefOffset > -1)
        {
            trailer = parseXref(startXRefOffset);
        }
        else
        {
            rebuildTrailer = isLenient();
        }
    }
    catch (IOException exception)
    {
        if (isLenient())
        {
            rebuildTrailer = true;
        }
        else
        {
            throw exception;
        }
    }
    // check if the trailer contains a Root object
    if (trailer != null && trailer.getItem(COSName.ROOT) == null)
    {
        rebuildTrailer = isLenient();
    }
    if (rebuildTrailer)
    {
        trailer = rebuildTrailer();
    }
    else
    {
        // prepare decryption if necessary
        prepareDecryption();
        if (bfSearchCOSObjectKeyOffsets != null && !bfSearchCOSObjectKeyOffsets.isEmpty())
        {
            bfSearchForObjStreams();
        }
    }
    return trailer;
}
 
Example 9
Source File: PDFMergerUtility.java    From gcs with Mozilla Public License 2.0 4 votes vote down vote up
/**
 * Update the Pg and Obj references to the new (merged) page.
 *
 * @param parentTreeEntry
 * @param objMapping mapping between old and new references
 */
private void updatePageReferences(PDFCloneUtility cloner,
        COSDictionary parentTreeEntry, Map<COSDictionary, COSDictionary> objMapping)
        throws IOException
{
    COSDictionary pageDict = parentTreeEntry.getCOSDictionary(COSName.PG);
    if (objMapping.containsKey(pageDict))
    {
        parentTreeEntry.setItem(COSName.PG, objMapping.get(pageDict));
    }
    COSBase obj = parentTreeEntry.getDictionaryObject(COSName.OBJ);
    if (obj instanceof COSDictionary)
    {
        COSDictionary objDict = (COSDictionary) obj;
        if (objMapping.containsKey(objDict))
        {
            parentTreeEntry.setItem(COSName.OBJ, objMapping.get(objDict));
        }
        else
        {
            // PDFBOX-3999: clone objects that are not in mapping to make sure that
            // these don't remain attached to the source document
            COSBase item = parentTreeEntry.getItem(COSName.OBJ);
            if (item instanceof COSObject)
            {
                LOG.debug("clone potential orphan object in structure tree: " + item +
                        ", Type: " + objDict.getNameAsString(COSName.TYPE) +
                        ", Subtype: " + objDict.getNameAsString(COSName.SUBTYPE) +
                        ", T: " + objDict.getNameAsString(COSName.T));
            }
            else
            {
                // don't display in full because of stack overflow
                LOG.debug("clone potential orphan object in structure tree" +
                        ", Type: " + objDict.getNameAsString(COSName.TYPE) +
                        ", Subtype: " + objDict.getNameAsString(COSName.SUBTYPE) +
                        ", T: " + objDict.getNameAsString(COSName.T));
            }
            parentTreeEntry.setItem(COSName.OBJ, cloner.cloneForNewDocument(obj));
        }
    }
    COSBase kSubEntry = parentTreeEntry.getDictionaryObject(COSName.K);
    if (kSubEntry instanceof COSArray)
    {
        updatePageReferences(cloner, (COSArray) kSubEntry, objMapping);
    }
    else if (kSubEntry instanceof COSDictionary)
    {
        updatePageReferences(cloner, (COSDictionary) kSubEntry, objMapping);
    }
}