Java Code Examples for com.itextpdf.text.pdf.PdfDictionary#getAsDict()

The following examples show how to use com.itextpdf.text.pdf.PdfDictionary#getAsDict() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PortfolioFileExtraction.java    From testarea-itext5 with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * These two methods ({@link #extractAttachments(PdfReader, String)} and
 * {@link #extractAttachment(PdfReader, File, PdfString, PdfDictionary)})
 * essentially are the OP's original code posted in his question. They
 * extract files without the folder structure.
 */
public static void extractAttachments(PdfReader reader, String dir) throws IOException
{
    File folder = new File(dir);
    folder.mkdirs();

    PdfDictionary root = reader.getCatalog();

    PdfDictionary names = root.getAsDict(PdfName.NAMES);
    System.out.println("" + names.getKeys().toString());
    PdfDictionary embedded = names.getAsDict(PdfName.EMBEDDEDFILES);
    System.out.println("" + embedded.toString());

    PdfArray filespecs = embedded.getAsArray(PdfName.NAMES);

    //System.out.println(filespecs.getAsString(root1));
    for (int i = 0; i < filespecs.size();)
    {
        extractAttachment(reader, folder, filespecs.getAsString(i++), filespecs.getAsDict(i++));
    }
}
 
Example 2
Source File: PortfolioFileExtraction.java    From testarea-itext5 with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * <p>
 * These two methods ({@link #extractAttachmentsWithFolders(PdfReader, String)} and
 * {@link #extractAttachment(PdfReader, Map, PdfString, PdfDictionary)}) extend the
 * functionality of the OP's original code posted in his question. They extract files
 * with the folder structure.
 * </p>
 * <p>
 * The information concerning the portfolio folder structure is retrieved using
 * the method {@link #retrieveFolders(PdfReader, File)} and its helper method
 * {@link #collectFolders(Map, PdfDictionary, File)}.
 * </p>
 */
public static void extractAttachmentsWithFolders(PdfReader reader, String dir) throws IOException, DocumentException
{
    File folder = new File(dir);
    folder.mkdirs();

    Map<Integer, File> folders = retrieveFolders(reader, folder);

    PdfDictionary root = reader.getCatalog();

    PdfDictionary names = root.getAsDict(PdfName.NAMES);
    System.out.println("" + names.getKeys().toString());
    PdfDictionary embedded = names.getAsDict(PdfName.EMBEDDEDFILES);
    System.out.println("" + embedded.toString());

    PdfArray filespecs = embedded.getAsArray(PdfName.NAMES);

    for (int i = 0; i < filespecs.size();)
    {
        extractAttachment(reader, folders, folder, filespecs.getAsString(i++), filespecs.getAsDict(i++));
    }
}
 
Example 3
Source File: PortfolioFileExtraction.java    From testarea-itext5 with GNU Affero General Public License v3.0 6 votes vote down vote up
static Map<Integer, File> retrieveFolders(PdfReader reader, File baseDir) throws DocumentException
{
    Map<Integer, File> result = new HashMap<Integer, File>();

    PdfDictionary root = reader.getCatalog();
    PdfDictionary collection = root.getAsDict(PdfName.COLLECTION);
    if (collection == null)
        throw new DocumentException("Document has no Collection dictionary");
    PdfDictionary folders = collection.getAsDict(FOLDERS);
    if (folders == null)
        throw new DocumentException("Document collection has no folders dictionary");
    
    collectFolders(result, folders, baseDir);

    return result;
}
 
Example 4
Source File: RemappingExtractionFilter.java    From testarea-itext5 with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public void renderText(TextRenderInfo renderInfo)
{
    DocumentFont font =renderInfo.getFont();
    PdfDictionary dict = font.getFontDictionary();
    PdfDictionary encoding = dict.getAsDict(PdfName.ENCODING);
    PdfArray diffs = encoding.getAsArray(PdfName.DIFFERENCES);

    ;
    StringBuilder builder = new StringBuilder();
    for (byte b : renderInfo.getPdfString().getBytes())
    {
        PdfName name = diffs.getAsName((char)b);
        String s = name.toString().substring(2);
        int i = Integer.parseUnsignedInt(s, 16);
        builder.append((char)i);
    }

    try
    {
        stringField.set(renderInfo, builder.toString());
    }
    catch (IllegalArgumentException | IllegalAccessException e)
    {
        e.printStackTrace();
    }
    strategy.renderText(renderInfo);
}
 
Example 5
Source File: InsertPage.java    From testarea-itext5 with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * <p>
 * A primitive attempt at copying links from page <code>sourcePage</code>
 * of <code>PdfReader reader</code> to page <code>targetPage</code> of
 * <code>PdfStamper stamper</code>.
 * </p>
 * <p>
 * This method is meant only for the use case at hand, i.e. copying a link
 * to an external URI without expecting any advanced features.
 * </p>
 */
void copyLinks(PdfStamper stamper, int targetPage, PdfReader reader, int sourcePage)
{
    PdfDictionary sourcePageDict = reader.getPageNRelease(sourcePage);
    PdfArray annotations = sourcePageDict.getAsArray(PdfName.ANNOTS);
    if (annotations != null && annotations.size() > 0)
    {
        for (PdfObject annotationObject : annotations)
        {
            annotationObject = PdfReader.getPdfObject(annotationObject);
            if (!annotationObject.isDictionary())
                continue;
            PdfDictionary annotation = (PdfDictionary) annotationObject;
            if (!PdfName.LINK.equals(annotation.getAsName(PdfName.SUBTYPE)))
                continue;

            PdfArray rectArray = annotation.getAsArray(PdfName.RECT);
            if (rectArray == null || rectArray.size() < 4)
                continue;
            Rectangle rectangle = PdfReader.getNormalizedRectangle(rectArray);

            PdfName hightLight = annotation.getAsName(PdfName.H);
            if (hightLight == null)
                hightLight = PdfAnnotation.HIGHLIGHT_INVERT;

            PdfDictionary actionDict = annotation.getAsDict(PdfName.A);
            if (actionDict == null || !PdfName.URI.equals(actionDict.getAsName(PdfName.S)))
                continue;
            PdfString urlPdfString = actionDict.getAsString(PdfName.URI);
            if (urlPdfString == null)
                continue;
            PdfAction action = new PdfAction(urlPdfString.toString());

            PdfAnnotation link = PdfAnnotation.createLink(stamper.getWriter(), rectangle, hightLight, action);
            stamper.addAnnotation(link, targetPage);
        }
    }
}
 
Example 6
Source File: ReadFdf.java    From testarea-itext5 with GNU Affero General Public License v3.0 5 votes vote down vote up
void show(FdfReader fdfReader)
{
    PdfDictionary catalog = fdfReader.getCatalog();
    catalog = catalog.getAsDict(PdfName.FDF);
    Assert.assertNotNull("FDF catalogue is null", catalog);
    PdfArray annots = catalog.getAsArray(PdfName.ANNOTS);
    Assert.assertNotNull("FDF annotations are null", annots);
    System.out.println(annots);
}
 
Example 7
Source File: TextExtraction.java    From testarea-itext5 with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * <a href="http://stackoverflow.com/questions/37748346/extract-text-with-itext-not-works-encoding-or-crypted-text">
 * Extract text with iText not works: encoding or crypted text?
 * </a>
 * <br/>
 * <a href="https://dl.dropboxusercontent.com/u/6413030/pb.pdf">
 * pb.pdf
 * </a>
 * <p>
 * The document has not been provided by the OP but by
 * <a href="http://stackoverflow.com/users/1127485/sschuberth">sschuberth</a>
 * in a comment.
 * </p>
 * <p>
 * In contrast to {@link #testPb()}, we here first remove the <b>ToUnicode</b>
 * tables of the fonts. And indeed, now extraction succeeds.
 * </p>
 */
@Test
public void testPbNoToUnicode() throws Exception
{
    InputStream resourceStream = getClass().getResourceAsStream("pb.pdf");
    try
    {
        PdfReader reader = new PdfReader(resourceStream);
        for (int i = 1; i <= reader.getNumberOfPages(); i++)
        {
            PdfDictionary pageResources = reader.getPageResources(i);
            if (pageResources == null)
                continue;
            PdfDictionary pageFonts = pageResources.getAsDict(PdfName.FONT); 
            if (pageFonts == null)
                continue;
            for (PdfName key : pageFonts.getKeys())
            {
                PdfDictionary fontDictionary = pageFonts.getAsDict(key);
                fontDictionary.put(PdfName.TOUNICODE, null);
            }
        }

        String content = extractAndStore(reader, new File(RESULT_FOLDER, "pb-noToUnicode.%s.txt").toString());

        System.out.println("\nText pb.pdf without ToUnicode\n************************");
        System.out.println(content);
        System.out.println("************************");
    }
    finally
    {
        if (resourceStream != null)
            resourceStream.close();
    }
}
 
Example 8
Source File: PortfolioFileExtraction.java    From testarea-itext5 with GNU Affero General Public License v3.0 5 votes vote down vote up
static void collectFolders(Map<Integer, File> collection, PdfDictionary folder, File baseDir)
{
    PdfString name = folder.getAsString(PdfName.NAME);
    File folderDir = new File(baseDir, name.toString());
    folderDir.mkdirs();
    PdfNumber id = folder.getAsNumber(PdfName.ID);
    collection.put(id.intValue(), folderDir);

    PdfDictionary next = folder.getAsDict(PdfName.NEXT);
    if (next != null)
        collectFolders(collection, next, baseDir);
    PdfDictionary child = folder.getAsDict(CHILD);
    if (child != null)
        collectFolders(collection, child, folderDir);
}
 
Example 9
Source File: TransparentGraphicsRemover.java    From testarea-itext5 with GNU Affero General Public License v3.0 4 votes vote down vote up
PdfDictionary getGraphicsStateDictionary(PdfName gsName) {
    PdfDictionary extGStates = resources.getAsDict(PdfName.EXTGSTATE);
    return extGStates.getAsDict(gsName);
}
 
Example 10
Source File: InsertPage.java    From testarea-itext5 with GNU Affero General Public License v3.0 4 votes vote down vote up
/**
 * <a href="http://stackoverflow.com/questions/28911509/how-to-retain-page-labels-when-concatenating-an-existing-pdf-with-a-pdf-created">
 * How to retain page labels when concatenating an existing pdf with a pdf created from scratch?
 * </a>
 * <p>
 * A proposal how to implement the task using a {@link PdfStamper}.
 */
@Test
public void testInsertTitlePage() throws IOException, DocumentException
{
    try (   InputStream documentStream = getClass().getResourceAsStream("Labels.pdf");
            InputStream titleStream = getClass().getResourceAsStream("Cover.pdf");
            OutputStream outputStream = new FileOutputStream(new File(RESULT_FOLDER, "labels-with-cover-page.pdf"))    )
    {
        PdfReader titleReader = new PdfReader(titleStream);
        PdfReader reader = new PdfReader(documentStream);
        PdfStamper stamper = new PdfStamper(reader, outputStream);

        PdfImportedPage page = stamper.getImportedPage(titleReader, 1);
        stamper.insertPage(1, titleReader.getPageSize(1));
        PdfContentByte content = stamper.getUnderContent(1);
        content.addTemplate(page, 0, 0);
        copyLinks(stamper, 1, titleReader, 1);

        PdfDictionary root = reader.getCatalog();
        PdfDictionary labels = root.getAsDict(PdfName.PAGELABELS);
        if (labels != null)
        {
            PdfArray newNums = new PdfArray();
            
            newNums.add(new PdfNumber(0));
            PdfDictionary coverDict = new PdfDictionary();
            coverDict.put(PdfName.P, new PdfString("Cover Page"));
            newNums.add(coverDict);

            PdfArray nums = labels.getAsArray(PdfName.NUMS);
            if (nums != null)
            {
                for (int i = 0; i < nums.size() - 1; )
                {
                    int n = nums.getAsNumber(i++).intValue();
                    newNums.add(new PdfNumber(n+1));
                    newNums.add(nums.getPdfObject(i++));
                }
            }

            labels.put(PdfName.NUMS, newNums);
            stamper.markUsed(labels);
        }

        stamper.close();
    }
}
 
Example 11
Source File: SearchActionJavaScript.java    From testarea-itext5 with GNU Affero General Public License v3.0 4 votes vote down vote up
/**
 * <a href="http://stackoverflow.com/questions/41090131/searching-pdf-for-a-specific-string-in-javascript-action-in-itext">
 * Searching PDF for a specific string in JavaScript action in iText
 * </a>
 * <br/>
 * <a href="http://www21.zippyshare.com/v/RDdOJI97/file.html">
 * file.pdf
 * </a>
 * <p>
 * This test shows how to process the immediate JavaScript code in annotation actions.
 * </p> 
 */
@Test
public void testSearchJsActionInFile() throws IOException
{
    try (   InputStream resource = getClass().getResourceAsStream("file.pdf")   )
    {
        System.out.println("file.pdf - Looking for special JavaScript actions.");
        // Reads and parses a PDF document
        PdfReader reader = new PdfReader(resource);

        // For each PDF page
        for (int i = 1; i <= reader.getNumberOfPages(); i++)
        {
            System.out.printf("\nPage %d\n", i);
            // Get a page a PDF page
            PdfDictionary page = reader.getPageN(i);
            // Get all the annotations of page i
            PdfArray annotsArray = page.getAsArray(PdfName.ANNOTS);

            // If page does not have annotations
            if (annotsArray == null)
            {
                System.out.printf("No annotations.\n", i);
                continue;
            }

            // For each annotation
            for (int j = 0; j < annotsArray.size(); ++j)
            {
                System.out.printf("Annotation %d - ", j);

                // For current annotation
                PdfDictionary curAnnot = annotsArray.getAsDict(j);

                // check if has JS as described below
                PdfDictionary annotationAction = curAnnot.getAsDict(PdfName.A);
                if (annotationAction == null)
                {
                    System.out.print("no action");
                }
                // test if it is a JavaScript action
                else if (PdfName.JAVASCRIPT.equals(annotationAction.get(PdfName.S)))
                {
                    PdfObject scriptObject = annotationAction.getDirectObject(PdfName.JS);
                    if (scriptObject == null)
                    {
                        System.out.print("missing JS entry");
                        continue;
                    }
                    final String script;
                    if (scriptObject.isString())
                        script = ((PdfString)scriptObject).toUnicodeString();
                    else if (scriptObject.isStream())
                    {
                        try (   ByteArrayOutputStream baos = new ByteArrayOutputStream()    )
                        {
                            ((PdfStream)scriptObject).writeContent(baos);
                            script = baos.toString("ISO-8859-1");
                        }
                    }
                    else
                    {
                        System.out.println("malformed JS entry");
                        continue;
                    }

                    if (script.contains("if (this.hostContainer) { try {"))
                        System.out.print("contains test string - ");

                    System.out.printf("\n---\n%s\n---", script);
                    // what here?
                }
                else
                {
                    System.out.print("no JavaScript action");
                }
                System.out.println();
            }
        }
    }
}
 
Example 12
Source File: ProcessLink.java    From testarea-itext5 with GNU Affero General Public License v3.0 4 votes vote down vote up
/**
 * <a href="https://stackoverflow.com/questions/49370352/how-do-i-get-a-get-destination-page-of-a-link-in-pdf-file">
 * How do I get a get destination page of a link in PDF file?
 * </a>
 * <br/>
 * local-link.pdf - output of the test {@link CreateLink}.
 * <p>
 * This test shows how to access data of the target page, once by
 * directly reading from the page dictionary referenced from the
 * link destination, once by first determining the page number and
 * then using {@link PdfReader} helper methods.
 * </p>
 */
@Test
public void testDetermineTargetPage() throws IOException {
    try (   InputStream src = getClass().getResourceAsStream("local-link.pdf")  ) {
        PdfReader reader = new PdfReader(src);
        PdfDictionary page = reader.getPageN(1);
        PdfArray annots = page.getAsArray(PdfName.ANNOTS); 
        for (int i = 0; i < annots.size(); i++) {
            PdfDictionary annotation = annots.getAsDict(i);
            if (PdfName.LINK.equals(annotation.getAsName(PdfName.SUBTYPE))) {
                PdfArray d = annotation.getAsArray(PdfName.DEST);
                if (d == null) {
                    PdfDictionary action = annotation.getAsDict(PdfName.A);
                    if (action != null)
                        d = action.getAsArray(PdfName.D);
                }
                    
                if (d != null && d.size() > 0) {
                    System.out.println("Next destination -");
                    PdfIndirectReference pageReference = d.getAsIndirectObject(0);

                    // Work with target dictionary directly
                    PdfDictionary pageDict = d.getAsDict(0);
                    PdfArray boxArray = pageDict.getAsArray(PdfName.CROPBOX);
                    if (boxArray == null) {
                        boxArray = pageDict.getAsArray(PdfName.MEDIABOX);
                    }
                    Rectangle box = PdfReader.getNormalizedRectangle(boxArray);
                    System.out.printf("* Target page object %s has cropbox %s\n", pageReference, box);

                    // Work via page number
                    for (int pageNr = 1; pageNr <= reader.getNumberOfPages(); pageNr++) {
                        PRIndirectReference pp = reader.getPageOrigRef(pageNr);
                        if (pp.getGeneration() == pageReference.getGeneration() && pp.getNumber() == pageReference.getNumber()) {
                            System.out.printf("* Target page %s has cropbox %s\n", pageNr, reader.getCropBox(pageNr));
                            break;
                        }
                    }
                }
            }
        }
    }
}