org.apache.pdfbox.pdmodel.font.PDType3Font Java Examples

The following examples show how to use org.apache.pdfbox.pdmodel.font.PDType3Font. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PDFExtractor.java    From inception with Apache License 2.0 5 votes vote down vote up
private Shape calculateFontBounds(TextPosition text) throws IOException
{
    // glyph space -> user space
    // note: text.getTextMatrix() is *not* the Text Matrix, it's the Text Rendering Matrix
    AffineTransform at = text.getTextMatrix().createAffineTransform();

    // show rectangle with the real vertical bounds, based on the font bounding box y values
    // usually, the height is identical to what you see when marking text in Adobe Reader
    PDFont font = text.getFont();
    BoundingBox bbox = font.getBoundingBox();

    // advance width, bbox height (glyph space)
    float xadvance = font
        .getWidth(text.getCharacterCodes()[0]); // todo: should iterate all chars
    Rectangle2D.Float rect = new Rectangle2D.Float(0, bbox.getLowerLeftY(), xadvance,
        bbox.getHeight());

    if (font instanceof PDType3Font) {
        // bbox and font matrix are unscaled
        at.concatenate(font.getFontMatrix().createAffineTransform());
    }
    else {
        // bbox and font matrix are already scaled to 1000
        at.scale(1 / 1000f, 1 / 1000f);
    }
    Shape s = at.createTransformedShape(rect);
    s = flipAT.createTransformedShape(s);
    s = rotateAT.createTransformedShape(s);
    return s;
}
 
Example #2
Source File: TextStampRecognizer.java    From pdf-unstamper with GNU General Public License v3.0 5 votes vote down vote up
private static boolean recognizeWithFont(
        @NotNull String[] keywords,
        @NotNull byte[] inputText,
        @NotNull Set<PDFont> pdFonts,
        @NotNull boolean useStrict) {
    final String encodedInput = generateByteString(inputText);
    for (PDFont f : pdFonts) {
        if (Objects.isNull(f)) {
            continue;
        }

        /* do not encode unsupported font */
        if ((f instanceof PDType0Font && ((PDType0Font) f).getDescendantFont() instanceof PDCIDFontType0)
                || f instanceof PDType3Font) {
            continue;
        }

        for (String k : keywords) {
            try {
                final byte[] encodedKeywordBytes = f.encode(k);
                final String encodedKeyword = generateByteString(encodedKeywordBytes);

                if (checkDuplicate(encodedInput, encodedKeyword, useStrict)) {
                    return true;
                }
            } catch (Exception ignored) {
            }
        }
    }
    return false;
}
 
Example #3
Source File: PageDrawer.java    From gcs with Mozilla Public License 2.0 5 votes vote down vote up
@Override
protected void showType3Glyph(Matrix textRenderingMatrix, PDType3Font font, int code,
        String unicode, Vector displacement) throws IOException
{
    PDGraphicsState state = getGraphicsState();
    RenderingMode renderingMode = state.getTextState().getRenderingMode();
    if (!RenderingMode.NEITHER.equals(renderingMode))
    {
        super.showType3Glyph(textRenderingMatrix, font, code, unicode, displacement);
    }
}
 
Example #4
Source File: PDFStreamEngine.java    From gcs with Mozilla Public License 2.0 5 votes vote down vote up
/**
 * Called when a glyph is to be processed.This method is intended for overriding in subclasses,
 * the default implementation does nothing.
 *
 * @param textRenderingMatrix the current text rendering matrix, T<sub>rm</sub>
 * @param font the current font
 * @param code internal PDF character code for the glyph
 * @param unicode the Unicode text for this glyph, or null if the PDF does provide it
 * @param displacement the displacement (i.e. advance) of the glyph in text space
 * @throws IOException if the glyph cannot be processed
 */
protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, String unicode,
                         Vector displacement) throws IOException
{
    if (font instanceof PDType3Font)
    {
        showType3Glyph(textRenderingMatrix, (PDType3Font)font, code, unicode, displacement);
    }
    else
    {
        showFontGlyph(textRenderingMatrix, font, code, unicode, displacement);
    }
}
 
Example #5
Source File: PdfRenderer.java    From gcs with Mozilla Public License 2.0 4 votes vote down vote up
@Override
protected void writeString(String text, List<TextPosition> textPositions) throws IOException {
    text = text.toLowerCase();
    int index = text.indexOf(mTextToHighlight);
    if (index != -1) {
        PDPage          currentPage     = getCurrentPage();
        PDRectangle     pageBoundingBox = currentPage.getBBox();
        AffineTransform flip            = new AffineTransform();
        flip.translate(0, pageBoundingBox.getHeight());
        flip.scale(1, -1);
        PDRectangle mediaBox    = currentPage.getMediaBox();
        float       mediaHeight = mediaBox.getHeight();
        float       mediaWidth  = mediaBox.getWidth();
        int         size        = textPositions.size();
        while (index != -1) {
            int last = index + mTextToHighlight.length() - 1;
            for (int i = index; i <= last; i++) {
                TextPosition      pos  = textPositions.get(i);
                PDFont            font = pos.getFont();
                BoundingBox       bbox = font.getBoundingBox();
                Rectangle2D.Float rect = new Rectangle2D.Float(0, bbox.getLowerLeftY(), font.getWidth(pos.getCharacterCodes()[0]), bbox.getHeight());
                AffineTransform   at   = pos.getTextMatrix().createAffineTransform();
                if (font instanceof PDType3Font) {
                    at.concatenate(font.getFontMatrix().createAffineTransform());
                } else {
                    at.scale(1 / 1000.0f, 1 / 1000.0f);
                }
                Shape           shape     = flip.createTransformedShape(at.createTransformedShape(rect));
                AffineTransform transform = mGC.getTransform();
                int             rotation  = currentPage.getRotation();
                if (rotation != 0) {
                    switch (rotation) {
                    case 90:
                        mGC.translate(mediaHeight, 0);
                        break;
                    case 270:
                        mGC.translate(0, mediaWidth);
                        break;
                    case 180:
                        mGC.translate(mediaWidth, mediaHeight);
                        break;
                    default:
                        break;
                    }
                    mGC.rotate(Math.toRadians(rotation));
                }
                mGC.fill(shape);
                if (rotation != 0) {
                    mGC.setTransform(transform);
                }
            }
            index = last < size - 1 ? text.indexOf(mTextToHighlight, last + 1) : -1;
        }
    }
}
 
Example #6
Source File: RenderType3Character.java    From testarea-pdfbox2 with Apache License 2.0 4 votes vote down vote up
/**
     * <a href="http://stackoverflow.com/questions/42032729/render-type3-font-character-as-image-using-pdfbox">
     * Render Type3 font character as image using PDFBox
     * </a>
     * <br/>
     * <a href="https://drive.google.com/file/d/0B0f6X4SAMh2KRDJTbm4tb3E1a1U/view">
     * 4700198773.pdf
     * </a>
     * from
     * <a href="http://stackoverflow.com/questions/37754112/extract-text-with-custom-font-result-non-readble">
     * extract text with custom font result non readble
     * </a>
     * <p>
     * This test shows how one can render individual Type 3 font glyphs as bitmaps.
     * Unfortunately PDFBox out-of-the-box does not provide a class to render contents
     * of arbitrary XObjects, merely for rendering pages; thus, we simply create a page
     * with the glyph in question and render that page.   
     * </p>
     * <p>
     * As the OP did not provide a sample PDF, we simply use one from another
     * stackoverflow question. There obviously might remain issues with the
     * OP's files.
     * </p>
     */
    @Test
    public void testRender4700198773() throws IOException, NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException
    {
        Method PDPageContentStreamWrite = PDPageContentStream.class.getSuperclass().getDeclaredMethod("write", String.class);
        PDPageContentStreamWrite.setAccessible(true);

        try (   InputStream resource = getClass().getResourceAsStream("4700198773.pdf"))
        {
            PDDocument document = Loader.loadPDF(resource);

            PDPage page = document.getPage(0);
            PDResources pageResources = page.getResources();
            COSName f1Name = COSName.getPDFName("F1");
            PDType3Font fontF1 = (PDType3Font) pageResources.getFont(f1Name);
            Map<String, Integer> f1NameToCode = fontF1.getEncoding().getNameToCodeMap();

            COSDictionary charProcsDictionary = fontF1.getCharProcs();
            for (COSName key : charProcsDictionary.keySet())
            {
                COSStream stream = (COSStream) charProcsDictionary.getDictionaryObject(key);
                PDType3CharProc charProc = new PDType3CharProc(fontF1, stream);
                PDRectangle bbox = charProc.getGlyphBBox();
                if (bbox == null)
                    bbox = charProc.getBBox();
                Integer code = f1NameToCode.get(key.getName());

                if (code != null)
                {
                    PDDocument charDocument = new PDDocument();
                    PDPage charPage = new PDPage(bbox);
                    charDocument.addPage(charPage);
                    charPage.setResources(pageResources);
                    PDPageContentStream charContentStream = new PDPageContentStream(charDocument, charPage);
                    charContentStream.beginText();
                    charContentStream.setFont(fontF1, bbox.getHeight());
//                    charContentStream.write(String.format("<%2X> Tj\n", code).getBytes());
                    PDPageContentStreamWrite.invoke(charContentStream, String.format("<%2X> Tj\n", code));
                    charContentStream.endText();
                    charContentStream.close();

                    File result = new File(RESULT_FOLDER, String.format("4700198773-%s-%s.png", key.getName(), code));
                    PDFRenderer renderer = new PDFRenderer(charDocument);
                    BufferedImage image = renderer.renderImageWithDPI(0, 96);
                    ImageIO.write(image, "PNG", result);
                    charDocument.save(new File(RESULT_FOLDER, String.format("4700198773-%s-%s.pdf", key.getName(), code)));
                    charDocument.close();
                }
            }
        }
    }
 
Example #7
Source File: RenderType3Character.java    From testarea-pdfbox2 with Apache License 2.0 4 votes vote down vote up
/**
 * <a href="http://stackoverflow.com/questions/42032729/render-type3-font-character-as-image-using-pdfbox">
 * Render Type3 font character as image using PDFBox
 * </a>
 * <br/>
 * <a href="https://drive.google.com/file/d/0B0f6X4SAMh2KRDJTbm4tb3E1a1U/view">
 * 4700198773.pdf
 * </a>
 * from
 * <a href="http://stackoverflow.com/questions/37754112/extract-text-with-custom-font-result-non-readble">
 * extract text with custom font result non readble
 * </a>
 * <p>
 * This test shows how one can render individual Type 3 font glyphs as bitmaps.
 * Unfortunately PDFBox out-of-the-box does not provide a class to render contents
 * of arbitrary XObjects, merely for rendering pages; thus, we simply create a page
 * with the glyph in question and render that page.   
 * </p>
 * <p>
 * As the OP did not provide a sample PDF, we simply use one from another
 * stackoverflow question. There obviously might remain issues with the
 * OP's files.
 * </p>
 */
@Test
public void testRenderSdnList() throws IOException, IllegalAccessException, IllegalArgumentException, InvocationTargetException, NoSuchMethodException, SecurityException
{
    Method PDPageContentStreamWrite = PDPageContentStream.class.getSuperclass().getDeclaredMethod("write", String.class);
    PDPageContentStreamWrite.setAccessible(true);

    try (   InputStream resource = getClass().getResourceAsStream("sdnlist.pdf"))
    {
        PDDocument document = Loader.loadPDF(resource);

        PDPage page = document.getPage(1);
        PDResources pageResources = page.getResources();
        COSName f1Name = COSName.getPDFName("R144");
        PDType3Font fontF1 = (PDType3Font) pageResources.getFont(f1Name);
        Map<String, Integer> f1NameToCode = fontF1.getEncoding().getNameToCodeMap();

        COSDictionary charProcsDictionary = fontF1.getCharProcs();
        for (COSName key : charProcsDictionary.keySet())
        {
            COSStream stream = (COSStream) charProcsDictionary.getDictionaryObject(key);
            PDType3CharProc charProc = new PDType3CharProc(fontF1, stream);
            PDRectangle bbox = charProc.getGlyphBBox();
            if (bbox == null)
                bbox = charProc.getBBox();
            Integer code = f1NameToCode.get(key.getName());

            if (code != null)
            {
                PDDocument charDocument = new PDDocument();
                PDPage charPage = new PDPage(bbox);
                charDocument.addPage(charPage);
                charPage.setResources(pageResources);
                PDPageContentStream charContentStream = new PDPageContentStream(charDocument, charPage);
                charContentStream.beginText();
                charContentStream.setFont(fontF1, bbox.getHeight());
                //charContentStream.getOutputStream().write(String.format("<%2X> Tj\n", code).getBytes());
                PDPageContentStreamWrite.invoke(charContentStream, String.format("<%2X> Tj\n", code));
                charContentStream.endText();
                charContentStream.close();

                File result = new File(RESULT_FOLDER, String.format("sdnlist-%s-%s.png", key.getName(), code));
                PDFRenderer renderer = new PDFRenderer(charDocument);
                BufferedImage image = renderer.renderImageWithDPI(0, 96);
                ImageIO.write(image, "PNG", result);
                charDocument.save(new File(RESULT_FOLDER, String.format("sdnlist-%s-%s.pdf", key.getName(), code)));
                charDocument.close();
            }
        }
    }
}
 
Example #8
Source File: PDFStreamEngine.java    From gcs with Mozilla Public License 2.0 3 votes vote down vote up
/**
 * Called when a glyph is to be processed.This method is intended for overriding in subclasses,
 * the default implementation does nothing.
 *
 * @param textRenderingMatrix the current text rendering matrix, T<sub>rm</sub>
 * @param font the current font
 * @param code internal PDF character code for the glyph
 * @param unicode the Unicode text for this glyph, or null if the PDF does provide it
 * @param displacement the displacement (i.e. advance) of the glyph in text space
 * @throws IOException if the glyph cannot be processed
 */
protected void showType3Glyph(Matrix textRenderingMatrix, PDType3Font font, int code,
                              String unicode, Vector displacement) throws IOException
{
    PDType3CharProc charProc = font.getCharProc(code);
    if (charProc != null)
    {
        processType3Stream(charProc, textRenderingMatrix);
    }
}