org.apache.pdfbox.text.TextPosition Java Examples

The following examples show how to use org.apache.pdfbox.text.TextPosition. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TextSection.java    From testarea-pdfbox2 with Apache License 2.0 6 votes vote down vote up
String toString(List<List<TextPosition>> words)
{
    StringBuilder stringBuilder = new StringBuilder();
    boolean first = true;
    for (List<TextPosition> word : words)
    {
        if (first)
            first = false;
        else
            stringBuilder.append(' ');
        for (TextPosition textPosition : word)
        {
            stringBuilder.append(textPosition.getUnicode());
        }
    }
    // cf. http://stackoverflow.com/a/7171932/1729265
    return Normalizer.normalize(stringBuilder, Form.NFKC);
}
 
Example #2
Source File: ExtractCharacterCodes.java    From testarea-pdfbox2 with Apache License 2.0 6 votes vote down vote up
/**
 * <a href="https://stackoverflow.com/questions/50664162/some-glyph-ids-missing-while-trying-to-extract-glyph-id-from-pdf">
 * Some glyph ID's missing while trying to extract glyph ID from pdf
 * </a>
 * <br/>
 * <a href="http://1drv.ms/b/s!AmHcFaD-gMGyhkHr4PY6F4krYJ32">
 * pattern3.pdf
 * </a>
 * <p>
 * This test shows how to access the character codes of the extracted text
 * while preventing the {@link PDFTextStripper} from doing any preprocessing
 * steps, in particular from doing any diacritics merges.
 * </p>
 */
@Test
public void testExtractFromPattern3() throws IOException {
    try (   InputStream resource = getClass().getResourceAsStream("pattern3.pdf")    )
    {
        PDDocument document = Loader.loadPDF(resource);
        PDFTextStripper stripper = new PDFTextStripper() {
            
            @Override
            protected void processTextPosition(TextPosition textPosition) {
                try {
                    writeString(String.format("%s%s", textPosition.getUnicode(), Arrays.toString(textPosition.getCharacterCodes())));
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        };
        String text = stripper.getText(document);

        System.out.printf("\n*\n* pattern3.pdf\n*\n%s\n", text);
        Files.write(new File(RESULT_FOLDER, "pattern3.txt").toPath(), Collections.singleton(text));
    }
}
 
Example #3
Source File: ExtractWordCoordinates.java    From testarea-pdfbox2 with Apache License 2.0 6 votes vote down vote up
@Override
protected void writeString(String string, List<TextPosition> textPositions) throws IOException {
    String wordSeparator = getWordSeparator();
    List<TextPosition> word = new ArrayList<>();
    for (TextPosition text : textPositions) {
        String thisChar = text.getUnicode();
        if (thisChar != null) {
            if (thisChar.length() >= 1) {
                if (!thisChar.equals(wordSeparator)) {
                    word.add(text);
                } else if (!word.isEmpty()) {
                    printWord(word);
                    word.clear();
                }
            }
        }
    }
    if (!word.isEmpty()) {
        printWord(word);
        word.clear();
    }
}
 
Example #4
Source File: RectanglesOverText.java    From testarea-pdfbox2 with Apache License 2.0 6 votes vote down vote up
@Override
protected void writeString(String text, List<TextPosition> textPositions) throws IOException {
    TextLine tmpline = null;

    if (startOfLine) {
        tmpline = new TextLine();
        tmpline.text = text;
        tmpline.textPositions = textPositions;
        lines.add(tmpline);
    } else {
        tmpline = lines.get(lines.size() - 1);
        tmpline.text += text;
        tmpline.textPositions.addAll(textPositions);
    }

    if (startOfLine) {
        startOfLine = false;
    }
    super.writeString(text, textPositions);
}
 
Example #5
Source File: PDFLayoutTextStripper.java    From PDFLayoutTextStripper with Apache License 2.0 6 votes vote down vote up
private void iterateThroughTextList(Iterator<TextPosition> textIterator) {
    List<TextPosition> textPositionList = new ArrayList<TextPosition>();

    while ( textIterator.hasNext() ) {
        TextPosition textPosition = (TextPosition)textIterator.next();
        int numberOfNewLines = this.getNumberOfNewLinesFromPreviousTextPosition(textPosition);
        if ( numberOfNewLines == 0 ) {
            textPositionList.add(textPosition);
        } else {
            this.writeTextPositionList(textPositionList);
            this.createNewEmptyNewLines(numberOfNewLines);
            textPositionList.add(textPosition);
        }
        this.setPreviousTextPosition(textPosition);
    }
    if (!textPositionList.isEmpty()) {
        this.writeTextPositionList(textPositionList);
    }
}
 
Example #6
Source File: PDFLayoutTextStripper.java    From PDFLayoutTextStripper with Apache License 2.0 6 votes vote down vote up
private int getNumberOfNewLinesFromPreviousTextPosition(final TextPosition textPosition) {
    TextPosition previousTextPosition = this.getPreviousTextPosition();
    if ( previousTextPosition == null ) {
        return 1;
    }

    float textYPosition = Math.round( textPosition.getY() );
    float previousTextYPosition = Math.round( previousTextPosition.getY() );

    if ( textYPosition > previousTextYPosition && (textYPosition - previousTextYPosition > 5.5) ) {
        double height = textPosition.getHeight();
        int numberOfLines = (int) (Math.floor( textYPosition - previousTextYPosition) / height );
        numberOfLines = Math.max(1, numberOfLines - 1); // exclude current new line
        if (DEBUG) System.out.println(height + " " + numberOfLines);
        return numberOfLines ;
    } else {
        return 0;
    }
}
 
Example #7
Source File: PDFLayoutTextStripper.java    From quarkus-pdf-extract with Apache License 2.0 6 votes vote down vote up
private int getNumberOfNewLinesFromPreviousTextPosition(final TextPosition textPosition) {
    TextPosition previousTextPosition = this.getPreviousTextPosition();
    if ( previousTextPosition == null ) {
        return 1;
    }

    float textYPosition = Math.round( textPosition.getY() );
    float previousTextYPosition = Math.round( previousTextPosition.getY() );

    if ( textYPosition > previousTextYPosition && (textYPosition - previousTextYPosition > 5.5) ) {
        double height = textPosition.getHeight();
        int numberOfLines = (int) (Math.floor( textYPosition - previousTextYPosition) / height );
        numberOfLines = Math.max(1, numberOfLines - 1); // exclude current new line
        if (DEBUG) System.out.println(height + " " + numberOfLines);
        return numberOfLines ;
    } else {
        return 0;
    }
}
 
Example #8
Source File: PDFLayoutTextStripper.java    From quarkus-pdf-extract with Apache License 2.0 6 votes vote down vote up
private void iterateThroughTextList(Iterator<TextPosition> textIterator) {
    List<TextPosition> textPositionList = new ArrayList<TextPosition>();

    while ( textIterator.hasNext() ) {
        TextPosition textPosition = textIterator.next();
        int numberOfNewLines = this.getNumberOfNewLinesFromPreviousTextPosition(textPosition);
        if ( numberOfNewLines == 0 ) {
            textPositionList.add(textPosition);
        } else {
            this.writeTextPositionList(textPositionList);
            this.createNewEmptyNewLines(numberOfNewLines);
            textPositionList.add(textPosition);
        }
        this.setPreviousTextPosition(textPosition);
    }
    if (!textPositionList.isEmpty()) {
        this.writeTextPositionList(textPositionList);
    }
}
 
Example #9
Source File: PDFLayoutTextStripper.java    From PDFLayoutTextStripper with Apache License 2.0 5 votes vote down vote up
private boolean isCharacterCloseToPreviousWord(final TextPosition textPosition) {
    if ( ! firstCharacterOfLineFound ) {
        return false;
    }
    double numberOfSpaces = this.numberOfSpacesBetweenTwoCharacters(previousTextPosition, textPosition);
    return (numberOfSpaces > 1 && numberOfSpaces <= PDFLayoutTextStripper.OUTPUT_SPACE_CHARACTER_WIDTH_IN_PT);
}
 
Example #10
Source File: PDFLayoutTextStripper.java    From PDFLayoutTextStripper with Apache License 2.0 5 votes vote down vote up
private boolean isCharacterPartOfPreviousWord(final TextPosition textPosition) {
    TextPosition previousTextPosition = this.getPreviousTextPosition();
    if ( previousTextPosition.getUnicode().equals(" ") ) {
        return false;
    }
    double numberOfSpaces = this.numberOfSpacesBetweenTwoCharacters(previousTextPosition, textPosition);
    return (numberOfSpaces <= 1);
}
 
Example #11
Source File: PDFLayoutTextStripper.java    From PDFLayoutTextStripper with Apache License 2.0 5 votes vote down vote up
private double numberOfSpacesBetweenTwoCharacters(final TextPosition textPosition1, final TextPosition textPosition2) {
    double previousTextXPosition = textPosition1.getX();
    double previousTextWidth = textPosition1.getWidth();
    double previousTextEndXPosition = (previousTextXPosition + previousTextWidth);
    double numberOfSpaces = Math.abs(Math.round(textPosition2.getX() - previousTextEndXPosition));
    return numberOfSpaces;
}
 
Example #12
Source File: PDFLayoutTextStripper.java    From PDFLayoutTextStripper with Apache License 2.0 5 votes vote down vote up
private void writeLine(final List<TextPosition> textPositionList) {
    if ( textPositionList.size() > 0 ) {
        TextLine textLine = this.addNewLine();
        boolean firstCharacterOfLineFound = false;
        for (TextPosition textPosition : textPositionList ) {
            CharacterFactory characterFactory = new CharacterFactory(firstCharacterOfLineFound);
            Character character = characterFactory.createCharacterFromTextPosition(textPosition, this.getPreviousTextPosition());
            textLine.writeCharacterAtIndex(character);
            this.setPreviousTextPosition(textPosition);
            firstCharacterOfLineFound = true;
        }
    } else {
        this.addNewLine(); // white line
    }
}
 
Example #13
Source File: TextMetrics.java    From Pdf2Dom with GNU Lesser General Public License v3.0 5 votes vote down vote up
public TextMetrics(TextPosition tp)
{
    x = tp.getX();
    baseline = tp.getY();
    font = tp.getFont();
    width = tp.getWidth();
    height = tp.getHeight();
    pointSize = tp.getFontSizeInPt();
    fontSize = tp.getYScale();
    ascent = getAscent();
    descent = getDescent();
}
 
Example #14
Source File: TextMetrics.java    From Pdf2Dom with GNU Lesser General Public License v3.0 5 votes vote down vote up
public void append(TextPosition tp)
{
    width += tp.getX() - (x + width) + tp.getWidth();
    height = Math.max(height, tp.getHeight());
    ascent = Math.max(ascent, getAscent(tp.getFont(), tp.getYScale()));
    descent = Math.min(descent, getDescent(tp.getFont(), tp.getYScale()));
}
 
Example #15
Source File: VisualizeMarkedContent.java    From testarea-pdfbox2 with Apache License 2.0 5 votes vote down vote up
/**
 * This method shows the text content for a MCID and determines its
 * bounding box. It also recurses.
 */
Rectangle2D showContent(int mcid, Map<Integer, PDMarkedContent> theseMarkedContents) throws IOException {
    Rectangle2D box = null;
    PDMarkedContent markedContent = theseMarkedContents != null ? theseMarkedContents.get(mcid) : null;
    List<Object> contents = markedContent != null ? markedContent.getContents() : Collections.emptyList();
    StringBuilder textContent =  new StringBuilder();
    for (Object object : contents) {
        if (object instanceof TextPosition) {
            TextPosition textPosition = (TextPosition)object;
            textContent.append(textPosition.getUnicode());

            int[] codes = textPosition.getCharacterCodes();
            if (codes.length != 1) {
                System.out.printf("<!-- text position with unexpected number of codes: %d -->", codes.length);
            } else {
                box = union(box, calculateGlyphBounds(textPosition.getTextMatrix(), textPosition.getFont(), codes[0]).getBounds2D());
            }
        } else if (object instanceof PDMarkedContent) {
            PDMarkedContent thisMarkedContent = (PDMarkedContent) object;
            box = union(box, showContent(thisMarkedContent.getMCID(), theseMarkedContents));
        } else {
            textContent.append("?" + object);
        }
    }
    System.out.printf("%s\n", textContent);
    return box;
}
 
Example #16
Source File: TextStripper.java    From tabula-java with MIT License 5 votes vote down vote up
@Override
protected void writeString(String string, List<TextPosition> textPositions) throws IOException
{
    for (TextPosition textPosition: textPositions)
    {
        if (textPosition == null) {
            continue;
        }

        String c = textPosition.getUnicode();

        // if c not printable, return
        if (!isPrintable(c)) {
            continue;
        }

        Float h = textPosition.getHeightDir();

        if (c.equals(NBSP)) { // replace non-breaking space for space
            c = " ";
        }

        float wos = textPosition.getWidthOfSpace();

        TextElement te = new TextElement(Utils.round(textPosition.getYDirAdj() - h, 2),
                Utils.round(textPosition.getXDirAdj(), 2), Utils.round(textPosition.getWidthDirAdj(), 2),
                Utils.round(textPosition.getHeightDir(), 2), textPosition.getFont(), textPosition.getFontSize(), c,
                // workaround a possible bug in PDFBox:
                // https://issues.apache.org/jira/browse/PDFBOX-1755
                wos, textPosition.getDir());

        this.minCharWidth = (float) Math.min(this.minCharWidth, te.getWidth());
        this.minCharHeight = (float) Math.min(this.minCharHeight, te.getHeight());

        this.spatialIndex.add(te);
        this.textElements.add(te);
    }
}
 
Example #17
Source File: PDFLayoutTextStripper.java    From PDFLayoutTextStripper with Apache License 2.0 5 votes vote down vote up
private boolean isFirstCharacterOfAWord(final TextPosition textPosition) {
    if ( ! firstCharacterOfLineFound ) {
        return true;
    }
    double numberOfSpaces = this.numberOfSpacesBetweenTwoCharacters(previousTextPosition, textPosition);
    return (numberOfSpaces > 1) || this.isCharacterAtTheBeginningOfNewLine(textPosition);
}
 
Example #18
Source File: PDFLayoutTextStripper.java    From PDFLayoutTextStripper with Apache License 2.0 5 votes vote down vote up
@Override
protected void writePage() throws IOException {
    List<List<TextPosition>> charactersByArticle = super.getCharactersByArticle();
    for( int i = 0; i < charactersByArticle.size(); i++) {
        List<TextPosition> textList = charactersByArticle.get(i);
        try {
            this.sortTextPositionList(textList);
        } catch ( java.lang.IllegalArgumentException e) {
            System.err.println(e);
        }
        this.iterateThroughTextList(textList.iterator()) ;
    }
    this.writeToOutputStream(this.getTextLineList());
}
 
Example #19
Source File: ExtractMarkedContent.java    From testarea-pdfbox2 with Apache License 2.0 5 votes vote down vote up
/**
 * @see #showStructure(PDStructureNode, Map)
 * @see #testExtractTestWPhromma()
 */
void showContent(int mcid, Map<Integer, PDMarkedContent> theseMarkedContents) {
    PDMarkedContent markedContent = theseMarkedContents != null ? theseMarkedContents.get(mcid) : null;
    List<Object> contents = markedContent != null ? markedContent.getContents() : Collections.emptyList();
    StringBuilder textContent =  new StringBuilder();
    for (Object object : contents) {
        if (object instanceof TextPosition) {
            textContent.append(((TextPosition)object).getUnicode());
        } else {
            textContent.append("?" + object);
        }
    }
    System.out.printf("%s\n", textContent);
}
 
Example #20
Source File: SearchSubword.java    From testarea-pdfbox2 with Apache License 2.0 5 votes vote down vote up
List<TextPositionSequence> findSubwords(PDDocument document, int page, String searchTerm) throws IOException
{
    final List<TextPositionSequence> hits = new ArrayList<TextPositionSequence>();
    PDFTextStripper stripper = new PDFTextStripper()
    {
        @Override
        protected void writeString(String text, List<TextPosition> textPositions) throws IOException
        {
            System.out.printf("  -- %s\n", text);

            TextPositionSequence word = new TextPositionSequence(textPositions);
            String string = word.toString();

            int fromIndex = 0;
            int index;
            while ((index = string.indexOf(searchTerm, fromIndex)) > -1)
            {
                hits.add(word.subSequence(index, index + searchTerm.length()));
                fromIndex = index + 1;
            }
            super.writeString(text, textPositions);
        }
    };
    
    stripper.setSortByPosition(true);
    stripper.setStartPage(page);
    stripper.setEndPage(page);
    stripper.getText(document);
    return hits;
}
 
Example #21
Source File: SearchSubword.java    From testarea-pdfbox2 with Apache License 2.0 5 votes vote down vote up
void printSubwords(PDDocument document, String searchTerm) throws IOException
{
    System.out.printf("* Looking for '%s'\n", searchTerm);
    for (int page = 1; page <= document.getNumberOfPages(); page++)
    {
        List<TextPositionSequence> hits = findSubwords(document, page, searchTerm);
        for (TextPositionSequence hit : hits)
        {
            if (!searchTerm.equals(hit.toString()))
                System.out.printf("  Invalid (%s) ", hit.toString());
            TextPosition lastPosition = hit.textPositionAt(hit.length() - 1);
            System.out.printf("  Page %s at %s, %s with width %s and last letter '%s' at %s, %s\n",
                    page, hit.getX(), hit.getY(), hit.getWidth(),
                    lastPosition.getUnicode(), lastPosition.getXDirAdj(), lastPosition.getYDirAdj());
        }
    }
}
 
Example #22
Source File: ExtractText.java    From testarea-pdfbox2 with Apache License 2.0 5 votes vote down vote up
/**
 * <a href="https://stackoverflow.com/questions/51672080/pdfdomtree-does-not-detecting-white-spaces-while-converting-a-pdf-file-to-html">
 * PDFDomTree does not detecting white spaces while converting a pdf file to html
 * </a>
 * <br/>
 * <a href="https://drive.google.com/file/d/1SZNFCvGVbQzCxJiRr8HlW99ravC_Cm71/view?usp=sharing">
 * demo.pdf
 * </a>
 * <p>
 * This improved version does not ignore white space glyphs but
 * instead translates them into gaps. This is a work-around and
 * not a fix, different kinds of white spaces need to be handled
 * differently.
 * </p>
 * @see #testDemo()
 */
@Test
public void testDemoImproved() throws IOException, ParserConfigurationException
{
    System.out.printf("\n*\n* demo.pdf improved\n*\n");
    try (   InputStream resource = getClass().getResourceAsStream("/mkl/testarea/pdfbox2/extract/demo.pdf")    ) {
        PDDocument document = Loader.loadPDF(resource);

        PDFDomTree parser = new PDFDomTree(PDFDomTreeConfig.createDefaultConfig()) {
            @Override
            protected void processTextPosition(TextPosition text) {
                if (text.getUnicode().trim().isEmpty()) {
                    //finish current box (if any)
                    if (lastText != null)
                    {
                        finishBox();
                    }
                    //start a new box
                    curstyle = new BoxStyle(style);
                    lastText = null;
                } else {
                    super.processTextPosition(text);
                }
            }
        };
        Writer output = new PrintWriter(new File(RESULT_FOLDER, "demo-improved.html"), "utf-8");

        parser.writeText(document, output);
        output.close();
    }
}
 
Example #23
Source File: PDFVisibleTextStripper.java    From testarea-pdfbox2 with Apache License 2.0 5 votes vote down vote up
void deleteCharsInPath() {
    for (List<TextPosition> list : charactersByArticle) {
        List<TextPosition> toRemove = new ArrayList<>();
        for (TextPosition text : list) {
            Matrix textMatrix = text.getTextMatrix();
            Vector start = textMatrix.transform(new Vector(0, 0));
            Vector end = new Vector(start.getX() + text.getWidth(), start.getY());
            if (linePath.contains(lowerLeftX + start.getX(), lowerLeftY + start.getY()) ||
                    (checkEndPointToo && linePath.contains(lowerLeftX + end.getX(), lowerLeftY + end.getY()))) {
                toRemove.add(text);
            }
        }
        if (toRemove.size() != 0) {
            System.out.println(toRemove.size());
            list.removeAll(toRemove);
        }
    }
}
 
Example #24
Source File: PDFVisibleTextStripper.java    From testarea-pdfbox2 with Apache License 2.0 5 votes vote down vote up
@Override
protected void processTextPosition(TextPosition text) {
    Matrix textMatrix = text.getTextMatrix();
    Vector start = textMatrix.transform(new Vector(0, 0));
    Vector end = new Vector(start.getX() + text.getWidth(), start.getY());

    PDGraphicsState gs = getGraphicsState();
    Area area = gs.getCurrentClippingPath();
    if (area == null ||
            (contains(area, lowerLeftX + start.getX(), lowerLeftY + start.getY()) &&
                    ((!checkEndPointToo) || contains(area, lowerLeftX + end.getX(), lowerLeftY + end.getY()))))
        super.processTextPosition(text);
    else if (dropStream != null)
        dropStream.printf("Clipped '%s' at %s,%s\n", text.getUnicode(), lowerLeftX + start.getX(), lowerLeftY + start.getY());
}
 
Example #25
Source File: TextSection.java    From testarea-pdfbox2 with Apache License 2.0 5 votes vote down vote up
@Override
public String toString()
{
    StringBuilder stringBuilder = new StringBuilder();
    stringBuilder.append(definition.name).append(": ");
    if (!header.isEmpty())
        stringBuilder.append(toString(header));
    stringBuilder.append('\n');
    for (List<List<TextPosition>> bodyLine : body)
    {
        stringBuilder.append("    ").append(toString(bodyLine)).append('\n');
    }
    return stringBuilder.toString();
}
 
Example #26
Source File: PDFLayoutTextStripper.java    From quarkus-pdf-extract with Apache License 2.0 5 votes vote down vote up
@Override
protected void writePage() throws IOException {
    List<List<TextPosition>> charactersByArticle = super.getCharactersByArticle();
    for( int i = 0; i < charactersByArticle.size(); i++) {
        List<TextPosition> textList = charactersByArticle.get(i);
        try {
            this.sortTextPositionList(textList);
        } catch ( java.lang.IllegalArgumentException e) {
            System.err.println(e);
        }
        this.iterateThroughTextList(textList.iterator()) ;
    }
    this.writeToOutputStream(this.getTextLineList());
}
 
Example #27
Source File: ColorTextStripper.java    From testarea-pdfbox2 with Apache License 2.0 5 votes vote down vote up
@Override
protected void writeString(String text, List<TextPosition> textPositions) throws IOException
{
    for (TextPosition textPosition: textPositions)
    {
        RenderingMode charRenderingMode = renderingMode.get(textPosition);
        float[] charStrokingColor = strokingColor.get(textPosition);
        float[] charNonStrokingColor = nonStrokingColor.get(textPosition);

        StringBuilder textBuilder = new StringBuilder();
        textBuilder.append(textPosition.getUnicode())
                   .append("{");

        if (FILLING_MODES.contains(charRenderingMode))
        {
            textBuilder.append("FILL:")
                       .append(toString(charNonStrokingColor))
                       .append(';');
        }
        
        if (STROKING_MODES.contains(charRenderingMode))
        {
            textBuilder.append("STROKE:")
                       .append(toString(charStrokingColor))
                       .append(';');
        }

        if (CLIPPING_MODES.contains(charRenderingMode))
        {
            textBuilder.append("CLIP;");
        }

        textBuilder.append("}");
        writeString(textBuilder.toString());
    }
}
 
Example #28
Source File: ColorTextStripper.java    From testarea-pdfbox2 with Apache License 2.0 5 votes vote down vote up
@Override
protected void processTextPosition(TextPosition text)
{
    renderingMode.put(text, getGraphicsState().getTextState().getRenderingMode());
    strokingColor.put(text, getGraphicsState().getStrokingColor().getComponents());
    nonStrokingColor.put(text, getGraphicsState().getNonStrokingColor().getComponents());

    super.processTextPosition(text);
}
 
Example #29
Source File: PdfToTextInfoConverter.java    From testarea-pdfbox2 with Apache License 2.0 5 votes vote down vote up
@Override
protected void writeString(String string, List<TextPosition> textPositions) throws IOException {
    for (TextPosition text : textPositions) {           
        Integer characterColor = getCharacterColor(text);
        Integer characterBackgroundColor = getCharacterBackgroundColor(text);
        if ((characterColor != null && characterColor.equals(characterBackgroundColor)) || characterColor == characterBackgroundColor) {
            logger.info(String.format("Color and background coincide for '%s' at %3.2f, %3.2f : %h", text.getUnicode(), text.getX(), text.getY(), characterColor));
        }
    }
}
 
Example #30
Source File: PdfToTextInfoConverter.java    From testarea-pdfbox2 with Apache License 2.0 5 votes vote down vote up
@Override
protected void processTextPosition(TextPosition text) {
    PDGraphicsState gs = getGraphicsState();
    // check opacity for stroke and fill text 
    if (gs.getAlphaConstant() < Constants.EPSILON && gs.getNonStrokeAlphaConstant() < Constants.EPSILON) {
        return;
    }                       

    Vector center = getTextPositionCenterPoint(text);
    Area area = gs.getCurrentClippingPath();
    if (area == null || area.contains(lowerLeftX + center.getX(), lowerLeftY + center.getY())) {            
        nonStrokingColors.put(text, gs.getNonStrokingColor());
        super.processTextPosition(text);
    }
}