java source code of PDFTextStripper

gcs-master
- .github
  - FUNDING.yml
- bundler
  - bundler
    - Bundler.java
- third_party
  - com.lowagie.text
    - src
      - com
        lowagie
        text
        xml
        XmlParser.java
        XmlDomWriter.java
        XmlPeer.java
        SAXiTextHandler.java
        xmp
        XmpArray.java
        XmpSchema.java
        DublinCoreSchema.java
        XmpMMSchema.java
        XmpBasicSchema.java
        PdfSchema.java
        XmpReader.java
        LangAlt.java
        PdfA1Schema.java
        XmpWriter.java
        SAXmyHandler.java
        TagMap.java
        simpleparser
        EntitiesToUnicode.java
        SimpleXMLDocHandler.java
        EntitiesToSymbol.java
        SimpleXMLParser.java
        SimpleXMLDocHandlerComment.java
        IanaEncodings.java
        Rectangle.java
        pdf
        PdfGState.java
        GlyphList.java
        PdfPageEvent.java
        ByteBuffer.java
        ArabicLigaturizer.java
        PdfStamperImp.java
        PdfTextArray.java
        BarcodeDatamatrix.java
        PdfOCProperties.java
        PdfPSXObject.java
        draw
        VerticalPositionMark.java
        DrawInterface.java
        DottedLineSeparator.java
        LineSeparator.java
        PdfBorderDictionary.java
        MappedRandomAccessFile.java
        DefaultSplitCharacter.java
        PdfPTableEvent.java
        PdfArray.java
        TrueTypeFontUnicode.java
        PdfStamper.java
        PdfPages.java
        PdfLayerMembership.java
        crypto
        ARCFOUREncryption.java
        IVGenerator.java
        AESCipher.java
        PdfPage.java
        BarcodePostnet.java
        ColorDetails.java
        PdfFormXObject.java
        PdfTransparencyGroup.java
        PdfResources.java
        PdfRectangle.java
        PdfGraphics2D.java
        FontDetails.java
        TextField.java
        PdfAcroForm.java
        PatternColor.java
        codec
        BmpImage.java
        CCITTG4Encoder.java
        Base64.java
        GifImage.java
        TIFFFaxDecoder.java
        TIFFConstants.java
        TiffImage.java
        PngImage.java
        JBIG2Image.java
        TIFFLZWDecoder.java
        wmf
        MetaBrush.java
        MetaObject.java
        MetaPen.java
        InputMeta.java
        MetaState.java
        MetaDo.java
        MetaFont.java
        JBIG2SegmentReader.java
        TIFFField.java
        TIFFDirectory.java
        BarcodeEANSUPP.java
        OcspClient.java
        PdfMediaClipData.java
        PdfAppearance.java
        PdfObject.java
        PdfEFStream.java
        ExtraEncoding.java
        PdfDocument.java
        PushbuttonField.java
        TrueTypeFontSubSet.java
        CMYKColor.java
        FdfWriter.java
        PdfContentParser.java
        PdfTransition.java
        RandomAccessFileOrArray.java
        BarcodePDF417.java
        PdfDestination.java
        PdfPRow.java
        BadPdfFormatException.java
        PdfColor.java
        PdfCopyForms.java
        FontSelector.java
        Barcode128.java
        CFFFont.java
        PdfFormField.java
        PdfNameTree.java
        ExtendedColor.java
        PdfCopyFields.java
        PdfCell.java
        hyphenation
        Hyphen.java
        ByteVector.java
        HyphenationException.java
        Hyphenator.java
        TernaryTree.java
        Hyphenation.java
        HyphenationTree.java
        SimplePatternParser.java
        PatternConsumer.java
        CharVector.java
        PdfPatternPainter.java
        PdfReaderInstance.java
        Type3Glyph.java
        FdfReader.java
        PdfDictionary.java
        DocumentFont.java
        PdfEncodings.java
        CMapAwareDocumentFont.java
        LZWDecoder.java
        PRAcroForm.java
        PdfPageEventHelper.java
        PdfAction.java
        PdfCopyFormsImp.java
        PdfString.java
        PdfName.java
        HyphenationEvent.java
        CFFFontSubset.java
        CJKFont.java
        PRIndirectReference.java
        ColumnText.java
        PdfOutline.java
        PdfOCG.java
        ShadingColor.java
        PdfIndirectReference.java
        PRTokeniser.java
        PdfPTable.java
        BarcodeInter25.java
        PdfNumber.java
        AcroFields.java
        PdfLayer.java
        PdfRendition.java
        PdfFunction.java
        collection
        PdfCollectionSchema.java
        PdfCollection.java
        PdfCollectionSort.java
        PdfCollectionItem.java
        PdfTargetDictionary.java
        PdfCollectionField.java
        BidiOrder.java
        PdfNumberTree.java
        PdfPublicKeyRecipient.java
        PdfImage.java
        PdfLine.java
        PdfAnnotation.java
        internal
        PdfXConformanceImp.java
        PdfAnnotationsImp.java
        PdfVersionImp.java
        PdfViewerPreferencesImp.java
        PolylineShapeIterator.java
        PolylineShape.java
        Pfm2afm.java
        PdfFont.java
        PdfTable.java
        SimpleBookmark.java
        BaseFont.java
        PdfShading.java
        PdfPrinterGraphics2D.java
        OutputStreamCounter.java
        fonts
        cmaps
        CMap.java
        CodespaceRange.java
        CMapParser.java
        FontsResourceAnchor.java
        VerticalText.java
        PdfFileSpecification.java
        SequenceList.java
        IntHashtable.java
        GrayColor.java
        PdfStructureElement.java
        XfaForm.java
        PdfPCellEvent.java
        XfdfReader.java
        PdfContentByte.java
        PdfSmartCopy.java
        PdfException.java
        PdfICCBased.java
        PdfWriter.java
        HyphenationAuto.java
        PdfPageLabels.java
        Barcode.java
        PdfStream.java
        PdfTemplate.java
        PdfNull.java
        PdfShadingPattern.java
        PdfSpotColor.java
        PdfLiteral.java
        PdfImportedPage.java
        interfaces
        PdfVersion.java
        PdfPageActions.java
        PdfDocumentActions.java
        PdfRunDirection.java
        PdfViewerPreferences.java
        PdfAnnotations.java
        PdfXConformance.java
        PdfContents.java
        Barcode39.java
        OutputStreamEncryption.java
        SimpleNamedDestination.java
        PdfCopyFieldsImp.java
        EnumerateTTC.java
        PdfPCell.java
        PageResources.java
        BarcodeEAN.java
        PdfDate.java
        BidiLine.java
        FontMapper.java
        BarcodeCodabar.java
        PdfDeveloperExtension.java
        PdfBoolean.java
        PdfCopy.java
        PdfStructureTreeRoot.java
        Type3Font.java
        PdfXConformanceException.java
        StampContent.java
        DefaultFontMapper.java
        PdfBorderArray.java
        PdfDashPattern.java
        PdfIndirectObject.java
        MultiColumnText.java
        AsianFontMapper.java
        PdfPattern.java
        PdfChunk.java
        parser
        SimpleTextExtractingPdfContentStreamProcessor.java
        ContentOperator.java
        PdfContentStreamProcessor.java
        Matrix.java
        GraphicsState.java
        PdfContentReaderTool.java
        PdfTextExtractor.java
        PRStream.java
        Type1Font.java
        PdfReader.java
        SpotColor.java
        BaseField.java
        PdfEncryption.java
        PdfLister.java
        StandardDecryption.java
        events
        FieldPositioningEvents.java
        PdfPTableEventForwarder.java
        IndexEvents.java
        PdfPageEventForwarder.java
        PdfPCellEventForwarder.java
        TrueTypeFont.java
        RadioCheckField.java
        PdfPageElement.java
        ElementTags.java
        ImgCCITT.java
        SplitCharacter.java
        GreekList.java
        RectangleReadOnly.java
        ImgTemplate.java
        Header.java
        Image.java
        MarkedSection.java
        PageSize.java
        LargeElement.java
        html
        WebColors.java
        HtmlTagMap.java
        SAXmyHtmlHandler.java
        Markup.java
        HtmlTags.java
        HtmlPeer.java
        HtmlParser.java
        HtmlWriter.java
        HtmlEncoder.java
        simpleparser
        IncTable.java
        ImageProvider.java
        IncCell.java
        HTMLWorker.java
        ChainedProperties.java
        StyleSheet.java
        Img.java
        FactoryProperties.java
        ALink.java
        Phrase.java
        Jpeg2000.java
        Chapter.java
        BadElementException.java
        ElementListener.java
        HeaderFooter.java
        FontFactoryImp.java
        Utilities.java
        DocumentException.java
        Cell.java
        Annotation.java
        ZapfDingbatsList.java
        RomanList.java
        SpecialSymbol.java
        Row.java
        ListItem.java
        ImgWMF.java
        Anchor.java
        MarkedObject.java
        Section.java
        ImgJBIG2.java
        Element.java
        SimpleTable.java
        Chunk.java
        ZapfDingbatsNumberList.java
        DocListener.java
        Document.java
        exceptions
        IllegalPdfSyntaxException.java
        UnsupportedPdfException.java
        BadPasswordException.java
        InvalidPdfException.java
        TextElementArray.java
        ChapterAutoNumber.java
        Font.java
        Meta.java
        Paragraph.java
        Table.java
        FontFactory.java
        List.java
        ImgRaw.java
        DocWriter.java
        factories
        GreekAlphabetFactory.java
        RomanNumberFactory.java
        RomanAlphabetFactory.java
        ElementFactory.java
        Jpeg.java
        ExceptionConverter.java
        SimpleCell.java
        RtfElementInterface.java
      - module-info.java
    - resources
      - com
        lowagie
        text
        pdf
        fonts
        Helvetica-BoldOblique.afm
        Times-Roman.afm
        glyphlist.txt
        Courier-BoldOblique.afm
        Helvetica.afm
        mustRead.html
        Times-Italic.afm
        Helvetica-Bold.afm
        Times-BoldItalic.afm
        cmap_info.txt
        Symbol.afm
        Courier.afm
        Helvetica-Oblique.afm
        Courier-Bold.afm
        ZapfDingbats.afm
        Courier-Oblique.afm
        Times-Bold.afm
    - com.lowagie.text.iml
    - README.md
  - org.apache.pdfbox
    - src
      - org
        apache
        pdfbox
        rendering
        SoftMask.java
        RenderDestination.java
        PDFRenderer.java
        TilingPaintFactory.java
        ImageType.java
        Glyph2D.java
        Type1Glyph2D.java
        CIDType0Glyph2D.java
        PageDrawerParameters.java
        TilingPaint.java
        TTFGlyph2D.java
        GroupGraphics.java
        PageDrawer.java
        cos
        COSInteger.java
        COSDocument.java
        COSUpdateInfo.java
        COSArray.java
        COSBase.java
        UnmodifiableCOSDictionary.java
        COSInputStream.java
        COSStream.java
        PDFDocEncoding.java
        COSNull.java
        COSNumber.java
        COSDictionary.java
        COSString.java
        COSObjectKey.java
        COSBoolean.java
        COSObject.java
        COSFloat.java
        COSOutputStream.java
        package.html
        COSName.java
        ICOSVisitor.java
        pdfparser
        PDFStreamParser.java
        SequentialSource.java
        COSParser.java
        InputStreamSource.java
        PDFParser.java
        PDFXRef.java
        EndstreamOutputStream.java
        FDFParser.java
        BaseParser.java
        XrefTrailerResolver.java
        PDFXrefStreamParser.java
        PDFObjectStreamParser.java
        RandomAccessSource.java
        package.html
        PDFXRefStream.java
        util
        NumberFormatUtil.java
        Charsets.java
        Hex.java
        Matrix.java
        Vector.java
        Version.java
        XMLUtil.java
        QuickSort.java
        DateConverter.java
        package.html
        filetypedetector
        FileType.java
        ByteTrie.java
        FileTypeDetector.java
        multipdf
        PDFCloneUtility.java
        LayerUtility.java
        PDFMergerUtility.java
        Splitter.java
        PageExtractor.java
        Overlay.java
        pdfwriter
        COSWriterXRefEntry.java
        ContentStreamWriter.java
        COSStandardOutputStream.java
        COSWriter.java
        package.html
        contentstream
        PDFStreamEngine.java
        PDFGraphicsStreamEngine.java
        operator
        state
        SetLineCapStyle.java
        Restore.java
        SetLineMiterLimit.java
        SetLineJoinStyle.java
        SetRenderingIntent.java
        Save.java
        SetGraphicsStateParameters.java
        SetLineWidth.java
        SetMatrix.java
        EmptyGraphicsStackException.java
        SetLineDashPattern.java
        Concatenate.java
        SetFlatness.java
        OperatorProcessor.java
        graphics
        FillEvenOddRule.java
        ShadingFill.java
        CloseAndStrokePath.java
        FillNonZeroRule.java
        AppendRectangleToPath.java
        CurveToReplicateFinalPoint.java
        FillEvenOddAndStrokePath.java
        EndPath.java
        StrokePath.java
        ClipNonZeroRule.java
        CurveToReplicateInitialPoint.java
        MoveTo.java
        LegacyFillNonZeroRule.java
        CurveTo.java
        BeginInlineImage.java
        CloseFillNonZeroAndStrokePath.java
        ClipEvenOddRule.java
        DrawObject.java
        FillNonZeroAndStrokePath.java
        GraphicsOperatorProcessor.java
        package.html
        ClosePath.java
        LineTo.java
        CloseFillEvenOddAndStrokePath.java
        markedcontent
        BeginMarkedContentSequence.java
        BeginMarkedContentSequenceWithProperties.java
        EndMarkedContentSequence.java
        DrawObject.java
        Operator.java
        color
        SetStrokingDeviceRGBColor.java
        SetStrokingDeviceCMYKColor.java
        SetNonStrokingColorN.java
        SetStrokingColorSpace.java
        SetColor.java
        SetNonStrokingDeviceCMYKColor.java
        SetNonStrokingDeviceRGBColor.java
        SetStrokingDeviceGrayColor.java
        SetStrokingColorN.java
        SetStrokingColor.java
        SetNonStrokingColor.java
        SetNonStrokingColorSpace.java
        SetNonStrokingDeviceGrayColor.java
        text
        SetTextHorizontalScaling.java
        ShowText.java
        SetWordSpacing.java
        SetCharSpacing.java
        EndText.java
        ShowTextLine.java
        NextLine.java
        SetFontAndSize.java
        SetTextRenderingMode.java
        SetTextRise.java
        BeginText.java
        ShowTextAdjusted.java
        MoveText.java
        SetTextLeading.java
        MoveTextSetLeading.java
        ShowTextLineAndSpace.java
        OperatorName.java
        MissingOperandException.java
        DrawObject.java
        package.html
        PDContentStream.java
        package.html
        io
        RandomAccessFile.java
        RandomAccessBuffer.java
        RandomAccessRead.java
        RandomAccess.java
        RandomAccessBufferedFileInputStream.java
        ScratchFileBuffer.java
        MemoryUsageSetting.java
        RandomAccessInputStream.java
        IOUtils.java
        RandomAccessWrite.java
        SequentialRead.java
        RandomAccessOutputStream.java
        package.html
        ScratchFile.java
        text
        LegacyPDFStreamEngine.java
        TextPosition.java
        PDFTextStripperByArea.java
        PDFMarkedContentExtractor.java
        TextPositionComparator.java
        PDFTextStripper.java
        pdmodel
        PDPageTree.java
        PDDocumentNameDestinationDictionary.java
        PDAbstractContentStream.java
        PDPageContentStream.java
        fdf
        FDFDictionary.java
        FDFDocument.java
        FDFAnnotationHighlight.java
        FDFAnnotation.java
        FDFAnnotationSound.java
        FDFAnnotationStrikeOut.java
        FDFNamedPageReference.java
        FDFAnnotationCircle.java
        FDFAnnotationSquiggly.java
        FDFAnnotationCaret.java
        FDFAnnotationSquare.java
        FDFAnnotationUnderline.java
        FDFAnnotationStamp.java
        FDFTemplate.java
        FDFField.java
        FDFIconFit.java
        FDFAnnotationFileAttachment.java
        FDFPageInfo.java
        FDFPage.java
        FDFAnnotationPolygon.java
        FDFAnnotationPolyline.java
        FDFAnnotationInk.java
        FDFCatalog.java
        FDFAnnotationLine.java
        FDFJavaScript.java
        XMLUtil.java
        FDFAnnotationTextMarkup.java
        FDFAnnotationText.java
        FDFAnnotationFreeText.java
        FDFAnnotationLink.java
        FDFOptionElement.java
        package.html
        PageLayout.java
        DefaultResourceCache.java
        PDDocumentCatalog.java
        PDDestinationNameTreeNode.java
        PDEmbeddedFilesNameTreeNode.java
        PDAppearanceContentStream.java
        common
        PDDestinationOrAction.java
        PDTypedDictionaryWrapper.java
        PDPageLabels.java
        PDPageLabelRange.java
        function
        PDFunctionType4.java
        PDFunctionType2.java
        PDFunction.java
        PDFunctionType0.java
        PDFunctionType3.java
        PDFunctionTypeIdentity.java
        type4
        Parser.java
        BitwiseOperators.java
        Operators.java
        RelationalOperators.java
        InstructionSequenceBuilder.java
        StackOperators.java
        Operator.java
        ExecutionContext.java
        ArithmeticOperators.java
        InstructionSequence.java
        package.html
        ConditionalOperators.java
        package.html
        PDStream.java
        COSArrayList.java
        PDNumberTreeNode.java
        PDNameTreeNode.java
        PDObjectStream.java
        filespecification
        PDFileSpecification.java
        PDComplexFileSpecification.java
        PDEmbeddedFile.java
        PDSimpleFileSpecification.java
        package.html
        PDDictionaryWrapper.java
        PDRange.java
        PDMetadata.java
        COSObjectable.java
        COSDictionaryMap.java
        PDRectangle.java
        package.html
        graphics
        optionalcontent
        PDOptionalContentProperties.java
        PDOptionalContentGroup.java
        PDOptionalContentMembershipDictionary.java
        blend
        BlendComposite.java
        BlendMode.java
        SeparableBlendMode.java
        NonSeparableBlendMode.java
        state
        PDTextState.java
        PDGraphicsState.java
        PDSoftMask.java
        RenderingMode.java
        PDExtendedGraphicsState.java
        RenderingIntent.java
        package.html
        pattern
        PDTilingPattern.java
        PDAbstractPattern.java
        package.html
        PDShadingPattern.java
        PDXObject.java
        form
        PDTransparencyGroup.java
        PDFormXObject.java
        PDTransparencyGroupAttributes.java
        package.html
        shading
        PDShadingType2.java
        Line.java
        ShadedTriangle.java
        PDShading.java
        Type1ShadingPaint.java
        Type6ShadingPaint.java
        TriangleBasedShadingContext.java
        ShadingPaint.java
        PatchMeshesShadingContext.java
        TensorPatch.java
        Type7ShadingContext.java
        Type5ShadingContext.java
        Type7ShadingPaint.java
        PDShadingType5.java
        PDShadingType1.java
        IntPoint.java
        Vertex.java
        PDShadingType4.java
        AxialShadingContext.java
        AxialShadingPaint.java
        Type5ShadingPaint.java
        ShadingContext.java
        RadialShadingPaint.java
        PDShadingType7.java
        Type6ShadingContext.java
        Type1ShadingContext.java
        CoordinateColorPair.java
        PDShadingType3.java
        Patch.java
        RadialShadingContext.java
        GouraudShadingContext.java
        package.html
        Type4ShadingContext.java
        PDShadingType6.java
        PDTriangleBasedShadingType.java
        CoonsPatch.java
        Type4ShadingPaint.java
        CubicBezierCurve.java
        PDLineDashPattern.java
        PDPostScriptXObject.java
        PDFontSetting.java
        color
        PDTristimulus.java
        PDDeviceColorSpace.java
        PDDeviceNProcess.java
        PDLab.java
        PDSeparation.java
        PDDeviceGray.java
        PDICCBased.java
        PDDeviceN.java
        PDSpecialColorSpace.java
        PDColorSpace.java
        PDDeviceRGB.java
        PDCalGray.java
        PDIndexed.java
        PDJPXColorSpace.java
        PDOutputIntent.java
        PDPattern.java
        PDColor.java
        PDDeviceNAttributes.java
        PDCIEBasedColorSpace.java
        PDDeviceCMYK.java
        PDGamma.java
        PDCalRGB.java
        PDCIEDictionaryBasedColorSpace.java
        package.html
        image
        PDInlineImage.java
        CCITTFactory.java
        PDImage.java
        PDImageXObject.java
        JPEGFactory.java
        SampledImageReader.java
        LosslessFactory.java
        package.html
        package.html
        PDStructureElementNameTreeNode.java
        PDFormContentStream.java
        PDResources.java
        PageMode.java
        PDPage.java
        PDDocument.java
        interactive
        pagenavigation
        PDTransitionDirection.java
        PDTransitionDimension.java
        PDTransition.java
        PDTransitionStyle.java
        PDTransitionMotion.java
        PDThreadBead.java
        PDThread.java
        package.html
        measurement
        PDNumberFormatDictionary.java
        PDRectlinearMeasureDictionary.java
        PDViewportDictionary.java
        PDMeasureDictionary.java
        package.html
        form
        PDVariableText.java
        PDCheckBox.java
        PDSignatureField.java
        AppearanceStyle.java
        PDChoice.java
        PDComboBox.java
        PDButton.java
        PDAcroForm.java
        PlainTextFormatter.java
        PDTextField.java
        PDRadioButton.java
        PDFieldTree.java
        PDListBox.java
        PDDefaultAppearanceString.java
        PDTerminalField.java
        PDXFAResource.java
        PDNonTerminalField.java
        AppearanceGeneratorHelper.java
        FieldUtils.java
        PlainText.java
        package.html
        PDFieldFactory.java
        PDPushButton.java
        PDField.java
        viewerpreferences
        PDViewerPreferences.java
        package.html
        digitalsignature
        PDSeedValueCertificate.java
        PDPropBuild.java
        COSFilterInputStream.java
        SigningSupport.java
        PDSeedValueMDP.java
        PDSignature.java
        PDSeedValue.java
        ExternalSigningSupport.java
        SignatureInterface.java
        SignatureOptions.java
        PDSeedValueTimeStamp.java
        visible
        PDFTemplateStructure.java
        PDFTemplateCreator.java
        PDFTemplateBuilder.java
        PDVisibleSigBuilder.java
        PDVisibleSignDesigner.java
        PDVisibleSigProperties.java
        package.html
        package.html
        PDPropBuildDataDict.java
        action
        PDPageAdditionalActions.java
        PDActionLaunch.java
        PDActionJavaScript.java
        PDAdditionalActions.java
        PDActionEmbeddedGoTo.java
        PDActionResetForm.java
        PDFormFieldAdditionalActions.java
        PDActionMovie.java
        PDAction.java
        OpenMode.java
        PDActionImportData.java
        PDURIDictionary.java
        PDActionRemoteGoTo.java
        PDDocumentCatalogAdditionalActions.java
        PDWindowsLaunchParams.java
        PDActionSubmitForm.java
        PDActionFactory.java
        PDActionURI.java
        PDActionSound.java
        package.html
        PDActionGoTo.java
        PDAnnotationAdditionalActions.java
        PDActionNamed.java
        PDTargetDirectory.java
        PDActionThread.java
        PDActionHide.java
        annotation
        PDAnnotationLink.java
        PDAppearanceDictionary.java
        PDAnnotationPopup.java
        layout
        AppearanceStyle.java
        PlainTextFormatter.java
        PlainText.java
        PDAnnotationFileAttachment.java
        PDAnnotationMarkup.java
        PDAnnotationLine.java
        PDAppearanceStream.java
        PDAppearanceCharacteristicsDictionary.java
        PDAnnotationTextMarkup.java
        handlers
        PDSquigglyAppearanceHandler.java
        PDSoundAppearanceHandler.java
        PDUnderlineAppearanceHandler.java
        PDPolygonAppearanceHandler.java
        PDFreeTextAppearanceHandler.java
        PDAppearanceHandler.java
        PDSquareAppearanceHandler.java
        PDCircleAppearanceHandler.java
        PDCaretAppearanceHandler.java
        PDPolylineAppearanceHandler.java
        PDLineAppearanceHandler.java
        CloudyBorder.java
        AnnotationBorder.java
        PDTextAppearanceHandler.java
        PDInkAppearanceHandler.java
        PDLinkAppearanceHandler.java
        PDStrikeoutAppearanceHandler.java
        PDAbstractAppearanceHandler.java
        PDHighlightAppearanceHandler.java
        PDBorderEffectDictionary.java
        PDAnnotationWidget.java
        PDAppearanceEntry.java
        PDAnnotationSquareCircle.java
        PDAnnotationUnknown.java
        AnnotationFilter.java
        PDAnnotation.java
        PDBorderStyleDictionary.java
        PDExternalDataDictionary.java
        PDAnnotationRubberStamp.java
        package.html
        PDAnnotationText.java
        documentnavigation
        outline
        PDOutlineNode.java
        PDDocumentOutline.java
        PDOutlineItemIterator.java
        PDOutlineItem.java
        package.html
        destination
        PDPageFitDestination.java
        PDDestination.java
        PDPageXYZDestination.java
        PDPageFitHeightDestination.java
        PDPageDestination.java
        PDNamedDestination.java
        PDPageFitWidthDestination.java
        package.html
        PDPageFitRectangleDestination.java
        package.html
        MissingResourceException.java
        PDPatternContentStream.java
        ResourceCache.java
        documentinterchange
        prepress
        PDBoxStyle.java
        package.html
        logicalstructure
        PDStructureNode.java
        PDDefaultAttributeObject.java
        PDUserAttributeObject.java
        PDStructureTreeRoot.java
        PDMarkedContentReference.java
        PDObjectReference.java
        PDParentTreeValue.java
        PDAttributeObject.java
        PDStructureElement.java
        PDMarkInfo.java
        PDUserProperty.java
        Revisions.java
        package.html
        markedcontent
        PDPropertyList.java
        PDMarkedContent.java
        package.html
        taggedpdf
        StandardStructureTypes.java
        PDArtifactMarkedContent.java
        PDPrintFieldAttributeObject.java
        PDStandardAttributeObject.java
        PDListAttributeObject.java
        PDTableAttributeObject.java
        PDLayoutAttributeObject.java
        PDExportFormatAttributeObject.java
        PDFourColours.java
        package.html
        PDJavascriptNameTreeNode.java
        font
        PDSimpleFont.java
        Standard14Fonts.java
        FontCache.java
        PDType1FontEmbedder.java
        PDType1Font.java
        FontFormat.java
        PDType0Font.java
        PDFont.java
        FileSystemFontProvider.java
        CIDSystemInfo.java
        PDMMType1Font.java
        PDFontDescriptor.java
        CMapManager.java
        FontInfo.java
        PDFontFactory.java
        PDVectorFont.java
        PDType3Font.java
        PDTrueTypeFontEmbedder.java
        PDPanose.java
        FontMappers.java
        ToUnicodeWriter.java
        PDTrueTypeFont.java
        CIDFontMapping.java
        PDCIDFontType2.java
        Subsetter.java
        FontMapping.java
        PDPanoseClassification.java
        TrueTypeEmbedder.java
        PDType1CFont.java
        PDCIDSystemInfo.java
        PDType3CharProc.java
        PDCIDFont.java
        PDCIDFontType2Embedder.java
        PDCIDFontType0.java
        FontMapper.java
        FontMapperImpl.java
        package.html
        UniUtil.java
        encoding
        GlyphList.java
        Type1Encoding.java
        BuiltInEncoding.java
        Encoding.java
        DictionaryEncoding.java
        ZapfDingbatsEncoding.java
        MacRomanEncoding.java
        SymbolEncoding.java
        MacOSRomanEncoding.java
        MacExpertEncoding.java
        WinAnsiEncoding.java
        StandardEncoding.java
        PDFontLike.java
        FontProvider.java
        PDDocumentInformation.java
        package.html
        PDDocumentNameDictionary.java
        encryption
        SecurityHandlerFactory.java
        RC4Cipher.java
        PublicKeyDecryptionMaterial.java
        StandardDecryptionMaterial.java
        MessageDigests.java
        StandardProtectionPolicy.java
        DecryptionMaterial.java
        StandardSecurityHandler.java
        SecurityProvider.java
        SecurityHandler.java
        PublicKeyRecipient.java
        PDEncryption.java
        PDCryptFilterDictionary.java
        AccessPermission.java
        PublicKeyProtectionPolicy.java
        ProtectionPolicy.java
        InvalidPasswordException.java
        package.html
        SaslPrep.java
        filter
        MissingImageReaderException.java
        IdentityFilter.java
        FlateFilter.java
        DecodeResult.java
        Filter.java
        LZWFilter.java
        ASCIIHexFilter.java
        CryptFilter.java
        CCITTFaxFilter.java
        DCTFilter.java
        DecodeOptions.java
        JBIG2Filter.java
        ASCII85Filter.java
        RunLengthDecodeFilter.java
        FilterFactory.java
        JPXFilter.java
        Predictor.java
        TIFFExtension.java
        CCITTFaxEncoderStream.java
        CCITTFaxDecoderStream.java
        ASCII85OutputStream.java
        package.html
        ASCII85InputStream.java
        printing
        PDFPrintable.java
        Scaling.java
        PDFPageable.java
        Orientation.java
      - module-info.java
    - resources
      - org
        apache
        pdfbox
        resources
        icc
        ttf
        text
        BidiMirroring.txt
        glyphlist
        zapfdingbats.txt
        glyphlist.txt
        additional.txt
        version.properties
        afm
        Helvetica-BoldOblique.afm
        Times-Roman.afm
        Courier-BoldOblique.afm
        Helvetica.afm
        Times-Italic.afm
        Helvetica-Bold.afm
        MustRead.html
        Times-BoldItalic.afm
        Symbol.afm
        Courier.afm
        Helvetica-Oblique.afm
        Courier-Bold.afm
        ZapfDingbats.afm
        Courier-Oblique.afm
        Times-Bold.afm
    - org.apache.pdfbox.iml
    - README.md
  - org.apache.fontbox
    - src
      - org
        apache
        fontbox
        ttf
        TTFParser.java
        GlyphRenderer.java
        CmapLookup.java
        VerticalMetricsTable.java
        GlyfCompositeDescript.java
        WGL4Names.java
        CmapTable.java
        VerticalOriginTable.java
        TTFTable.java
        TTCDataStream.java
        OTLTable.java
        DigitalSignatureTable.java
        GlyphData.java
        OTFParser.java
        HeaderTable.java
        GlyfSimpleDescript.java
        KerningTable.java
        HorizontalMetricsTable.java
        GlyphSubstitutionTable.java
        MaximumProfileTable.java
        KerningSubtable.java
        HorizontalHeaderTable.java
        OpenTypeScript.java
        CFFTable.java
        OpenTypeFont.java
        NamingTable.java
        SubstitutingCmapLookup.java
        GlyfCompositeComp.java
        PostScriptTable.java
        OS2WindowsMetricsTable.java
        MemoryTTFDataStream.java
        GlyphDescription.java
        IndexToLocationTable.java
        TTFSubsetter.java
        RAFDataStream.java
        BufferedRandomAccessFile.java
        GlyphTable.java
        VerticalHeaderTable.java
        NameRecord.java
        TrueTypeCollection.java
        TTFDataStream.java
        CmapSubtable.java
        package.html
        GlyfDescript.java
        TrueTypeFont.java
        FontBoxFont.java
        cmap
        CMap.java
        CodespaceRange.java
        CMapParser.java
        CIDRange.java
        package.html
        pfb
        PfbParser.java
        package.html
        util
        Charsets.java
        BoundingBox.java
        autodetect
        NativeFontDirFinder.java
        WindowsFontDirFinder.java
        FontFileFinder.java
        MacFontDirFinder.java
        OS400FontDirFinder.java
        package.html
        FontDirFinder.java
        UnixFontDirFinder.java
        package.html
        EncodedFont.java
        type1
        Token.java
        Type1Lexer.java
        Type1Parser.java
        DamagedFontException.java
        Type1CharStringReader.java
        package.html
        Type1Font.java
        cff
        Type1CharString.java
        CFFExpertCharset.java
        CFFExpertEncoding.java
        Type2CharString.java
        CFFType1Font.java
        CFFExpertSubsetCharset.java
        Type2CharStringParser.java
        CFFOperator.java
        CFFFont.java
        CFFISOAdobeCharset.java
        CFFCharset.java
        CFFCIDFont.java
        FDSelect.java
        CIDKeyedType2CharString.java
        DataOutput.java
        Type1FontUtil.java
        CharStringCommand.java
        DataInput.java
        CFFEncoding.java
        CFFStandardString.java
        CFFDataInput.java
        CFFParser.java
        CFFStandardEncoding.java
        package.html
        CharStringHandler.java
        Type1CharStringParser.java
        encoding
        BuiltInEncoding.java
        Encoding.java
        MacRomanEncoding.java
        StandardEncoding.java
        package.html
        afm
        AFMParser.java
        FontMetrics.java
        Composite.java
        CompositePart.java
        CharMetric.java
        Ligature.java
        TrackKern.java
        package.html
        KernPair.java
      - module-info.java
    - resources
      - org
        apache
        fontbox
        cmap
        Adobe-Japan1-1
        90ms-RKSJ-H
        Adobe-Korea1-UCS2
        Adobe-Japan1-2
        Adobe-Japan1-4
        Ext-RKSJ-H
        KSCpc-EUC-V
        Adobe-GB1-1
        Adobe-CNS1-2
        Adobe-GB1-0
        Adobe-Japan1-5
        Adobe-Japan2-0
        UniCNS-UTF16-V
        ETenms-B5-H
        KSC-EUC-V
        GBKp-EUC-V
        HKscs-B5-V
        83pv-RKSJ-H
        Adobe-GB1-4
        UniKS-UTF16-H
        UniJIS-UCS2-V
        B5pc-V
        Add-RKSJ-H
        UniJIS-UTF16-V
        Adobe-CNS1-1
        UniGB-UTF16-H
        Identity-V
        GB-EUC-V
        Adobe-CNS1-0
        KSCpc-EUC-H
        GBK-EUC-H
        EUC-H
        Ext-RKSJ-V
        V
        GB-EUC-H
        GBpc-EUC-H
        Adobe-GB1-5
        Adobe-Korea1-1
        UniGB-UCS2-V
        ETenms-B5-V
        KSCms-UHC-HW-V
        Adobe-CNS1-4
        Adobe-GB1-2
        Adobe-GB1-3
        UniKS-UCS2-V
        GBK2K-H
        90pv-RKSJ-H
        B5pc-H
        ETen-B5-H
        CNS-EUC-H
        GBK-EUC-V
        HKscs-B5-H
        90pv-RKSJ-V
        KSCms-UHC-HW-H
        90msp-RKSJ-V
        UniCNS-UCS2-V
        Adobe-Japan1-6
        GBpc-EUC-V
        Identity-H
        Adobe-CNS1-6
        Adobe-Japan1-0
        UniJIS-UCS2-HW-H
        ETen-B5-V
        Add-RKSJ-V
        GBKp-EUC-H
        90msp-RKSJ-H
        KSC-EUC-H
        Adobe-Japan1-3
        Adobe-CNS1-5
        UniJIS-UCS2-HW-V
        KSCms-UHC-V
        90ms-RKSJ-V
        Adobe-CNS1-3
        CNS-EUC-V
        UniKS-UCS2-H
        UniKS-UTF16-V
        EUC-V
        H
        UniJIS-UTF16-H
        Adobe-Korea1-0
        Adobe-Korea1-2
        UniJIS-UCS2-H
        GBK2K-V
        UniGB-UTF16-V
        KSCms-UHC-H
        unicode
        Scripts.txt
    - org.apache.fontbox.iml
    - README.md
  - org.apache.commons.logging
    - src
      - org
        apache
        commons
        logging
        Log.java
        LogFactory.java
        LogConfigurationException.java
        NoOpLog.java
      - module-info.java
    - org.apache.commons.logging.iml
    - README.md
- bundle.bat
- artifacts
  - icns
  - artwork_prep
    - misc
    - types
  - png
  - ico
    - gcs_doc.ico
    - eqp_doc.ico
    - not_doc.ico
    - skl_doc.ico
    - adm_doc.ico
    - app.ico
    - gct_doc.ico
    - spl_doc.ico
    - eqm_doc.ico
    - adq_doc.ico
  - file_associations
    - windows
      - adq_ext.properties
      - skl_ext.properties
      - eqp_ext.properties
      - gct_ext.properties
      - not_ext.properties
      - adm_ext.properties
      - gcs_ext.properties
      - eqm_ext.properties
      - spl_ext.properties
    - linux
      - adq_ext.properties
      - skl_ext.properties
      - eqp_ext.properties
      - gct_ext.properties
      - not_ext.properties
      - adm_ext.properties
      - gcs_ext.properties
      - eqm_ext.properties
      - spl_ext.properties
    - macos
      - adq_ext.properties
      - skl_ext.properties
      - eqp_ext.properties
      - gct_ext.properties
      - not_ext.properties
      - adm_ext.properties
      - gcs_ext.properties
      - eqm_ext.properties
      - spl_ext.properties
- LICENSE
- .gitattributes
- bundle.sh
- com.trollworks.gcs
  - src
    - com
      - trollworks
        gcs
        library
        LibraryExplorerRow.java
        LibraryFields.java
        LibraryLocationsPanel.java
        LibraryExplorerRowRenderer.java
        LibraryDirectoryRow.java
        LibraryFileRow.java
        LibraryContent.java
        LibraryUpdater.java
        LibraryCollector.java
        LibraryExplorerDockable.java
        LibraryWatcher.java
        Library.java
        LibraryDockable.java
        LibraryHeader.java
        page
        PagePoints.java
        DropPanel.java
        PageLabel.java
        PageField.java
        PageOwner.java
        PageHeader.java
        Page.java
        equipment
        EquipmentDockable.java
        EquipmentList.java
        Equipment.java
        EquipmentEditor.java
        EquipmentOutline.java
        CheckCell.java
        EquipmentColumn.java
        weapon
        RangedWeaponStats.java
        WeaponStats.java
        WeaponDescriptionCell.java
        MeleeWeaponEditor.java
        WeaponDisplayRow.java
        WeaponOutline.java
        RangedWeaponEditor.java
        WeaponDamage.java
        WeaponColumn.java
        WeaponSTDamage.java
        WeaponEditor.java
        MeleeWeaponStats.java
        criteria
        NumericCriteria.java
        StringCompareType.java
        DoubleCriteria.java
        WeightCriteria.java
        IntegerCriteria.java
        NumericCompareType.java
        StringCriteria.java
        menu
        library
        LibraryUpdateCommand.java
        LibraryMenu.java
        ShowLibraryFolderCommand.java
        ChangeLibraryLocationsCommand.java
        help
        HelpMenuProvider.java
        UpdateAppCommand.java
        AboutCommand.java
        OpenURICommand.java
        DynamicMenuItem.java
        RetargetableFocus.java
        MenuHelpers.java
        DynamicJMenuItemPropertyChangeListener.java
        item
        ItemMenuProvider.java
        NewEquipmentCommand.java
        ApplyTemplateCommand.java
        NewSpellCommand.java
        NewSkillCommand.java
        OpenEditorCommand.java
        NewEquipmentModifierCommand.java
        CopyToTemplateCommand.java
        NewNoteCommand.java
        OpenPageReferenceCommand.java
        HasSourceReference.java
        NewAdvantageCommand.java
        NewAdvantageModifierCommand.java
        AddNaturalAttacksAdvantageCommand.java
        CopyToSheetCommand.java
        file
        OpenCommand.java
        ExportMenu.java
        NewAdvantageModifiersLibraryCommand.java
        SaveCommand.java
        ExportToPNGCommand.java
        ExportToGURPSCalculatorCommand.java
        ClearRecentFilesMenuCommand.java
        SignificantFrame.java
        NewEquipmentModifiersLibraryCommand.java
        NewNoteLibraryCommand.java
        ExportToTextTemplateCommand.java
        Saveable.java
        NewEquipmentLibraryCommand.java
        QuitCommand.java
        NewCharacterTemplateCommand.java
        CloseCommand.java
        CloseHandler.java
        NewCharacterSheetCommand.java
        SaveAsCommand.java
        PrintCommand.java
        NewSpellsLibraryCommand.java
        NewAdvantagesLibraryCommand.java
        ExportToPDFCommand.java
        FileMenuProvider.java
        RecentFilesMenu.java
        PageSetupCommand.java
        NewSkillsLibraryCommand.java
        OpenDataFileCommand.java
        Command.java
        edit
        Cutable.java
        IncrementCommand.java
        UndoCommand.java
        Undoable.java
        CutCommand.java
        Incrementable.java
        DecrementUsesCommand.java
        SkillLevelIncrementCommand.java
        Copyable.java
        Deletable.java
        EditMenuProvider.java
        TechLevelIncrementCommand.java
        SwapDefaultsCommand.java
        Openable.java
        ConvertToContainer.java
        DecrementCommand.java
        TechLevelIncrementable.java
        OpenItemCommand.java
        SelectAllCapable.java
        PreferencesCommand.java
        PasteCommand.java
        SkillLevelDecrementCommand.java
        TechLevelDecrementCommand.java
        RandomizeNameCommand.java
        MoveEquipmentCommand.java
        IncrementUsesCommand.java
        Pastable.java
        RedoCommand.java
        DuplicateCommand.java
        DeleteCommand.java
        JumpToSearchCommand.java
        UsesIncrementable.java
        CopyCommand.java
        SelectAllCommand.java
        SkillLevelIncrementable.java
        JumpToSearchTarget.java
        ToggleStateCommand.java
        RandomizeDescriptionCommand.java
        StdMenuBar.java
        DynamicMenuEnabler.java
        DynamicCheckBoxMenuItem.java
        modifier
        EquipmentModifierWeightType.java
        AdvantageModifierCostType.java
        AdvantageModifiersOutline.java
        AdvantageModifier.java
        EquipmentModifierCostType.java
        EquipmentModifierColumnID.java
        Fraction.java
        ModifierListEditor.java
        AdvantageModifierEditor.java
        AdvantageModifierColumnID.java
        AdvantageModifierListEditor.java
        ModifierWeightValueType.java
        EquipmentModifiersDockable.java
        EquipmentModifierEnabler.java
        Affects.java
        ModifierCheckCell.java
        EquipmentModifierList.java
        EquipmentModifiersOutline.java
        ModifierCostValueType.java
        AdvantageModifiersDockable.java
        EquipmentModifier.java
        AdvantageModifierList.java
        Modifier.java
        AdvantageModifierEnabler.java
        EquipmentModifierListEditor.java
        EquipmentModifierEditor.java
        ui
        MarkdownDocument.java
        print
        Quality.java
        PrintManager.java
        ObjectWrapper.java
        DummyPrintService.java
        PrintPanel.java
        PageSides.java
        PageOrientation.java
        PageSetupPanel.java
        InkChromaticity.java
        PrintUtilities.java
        scale
        ScaleRoot.java
        Scale.java
        Scales.java
        TextDrawing.java
        layout
        PrecisionLayout.java
        FlexColumn.java
        PrecisionLayoutAlignment.java
        FlexGrid.java
        RowDistribution.java
        Alignment.java
        FlexContainer.java
        FlexRow.java
        ColumnLayout.java
        FlexSpacer.java
        PrecisionLayoutData.java
        FlexGridData.java
        FlexCell.java
        FlexLayout.java
        LayoutSize.java
        FlexComponent.java
        Fonts.java
        SelectionOwner.java
        WindowSizeEnforcer.java
        GraphicsUtilities.java
        Colors.java
        UIUtilities.java
        widget
        DirectScrollPanel.java
        Label.java
        KeyStrokeDisplay.java
        ActionPanel.java
        outline
        HeaderCell.java
        ListTextCell.java
        RowSelection.java
        OutlineModelListener.java
        RowUndo.java
        TextCell.java
        ListRow.java
        RowFilter.java
        OutlineModel.java
        ColumnConfig.java
        Cell.java
        Row.java
        RowEditor.java
        Switchable.java
        MultipleRowUndo.java
        RowItemRenderer.java
        OutlineHeader.java
        MultiCell.java
        ColumnUtils.java
        Outline.java
        IconsCell.java
        ListHeaderCell.java
        OutlineSyncer.java
        Column.java
        RowSorter.java
        ListOutline.java
        RowUndoSnapshot.java
        RowIterator.java
        OutlineProxy.java
        WrappedCell.java
        RowPostProcessor.java
        search
        SearchTarget.java
        Search.java
        SearchDropDown.java
        EditorPanel.java
        SizeAwareBasicOptionPaneUI.java
        Enabled.java
        WindowUtils.java
        BaseWindow.java
        FontPanel.java
        LinkedLabel.java
        Toolbar.java
        WiderToolTipUI.java
        Wrapper.java
        PopupButton.java
        Workspace.java
        EditorField.java
        DataModifiedListener.java
        tree
        TreeDragState.java
        TreeRoot.java
        TreeRowIndexComparator.java
        TreeRowDragState.java
        TreeNotificationKeys.java
        IconInteractor.java
        TextTreeColumn.java
        TreeColumn.java
        TreeColumnDragState.java
        TreeContainerRow.java
        IconTreeColumn.java
        TreeRowIterator.java
        TreePanel.java
        TreeSorter.java
        TreeRow.java
        FieldAccessor.java
        IconAccessor.java
        TreeRowViewIterator.java
        TreeRowSelection.java
        StdTreeDeleter.java
        Icons.java
        ColorWell.java
        BandedPanel.java
        Commitable.java
        dock
        Dockable.java
        DockHeader.java
        DockContainerProcessor.java
        ShowTabsButton.java
        Dock.java
        DockColors.java
        DockContainer.java
        DockableTransferable.java
        DockLocation.java
        DockLayout.java
        DockableFactory.java
        DockTab.java
        DockLayoutNode.java
        DirectScrollPanelArea.java
        StdFileDialog.java
        IconButton.java
        RetinaIcon.java
        AboutPanel.java
        MouseCapture.java
        border
        TitledBorder.java
        Edge.java
        LineBorder.java
        EmptyBorder.java
        image
        Images.java
        Img.java
        Cursors.java
        Selection.java
        utility
        RecursiveDirectoryRemover.java
        xml
        XMLReader.java
        XMLWriter.java
        XMLNodeType.java
        Timing.java
        UrlUtils.java
        task
        Task.java
        Tasks.java
        UITask.java
        launchproxy
        Server.java
        Client.java
        State.java
        ConduitMessage.java
        LaunchProxy.java
        UpdateChecker.java
        Fixed6.java
        undo
        StdUndoManager.java
        MultipleUndo.java
        Platform.java
        FileProxy.java
        Debug.java
        Log.java
        units
        WeightValue.java
        LengthValue.java
        UnitsValue.java
        WeightUnits.java
        Units.java
        LengthUnits.java
        ReverseListIterator.java
        Geometry.java
        PrintProxy.java
        Dice.java
        DummyWriter.java
        Version.java
        VersionException.java
        I18n.java
        FileType.java
        FilteredList.java
        FilteredIterator.java
        text
        Conversion.java
        DoubleFormatter.java
        IntegerFormatter.java
        DateTimeFormatter.java
        DiceFormatter.java
        Enums.java
        WeightFormatter.java
        Text.java
        NumericComparator.java
        Numbers.java
        HeightFormatter.java
        NumberFilter.java
        notification
        NotifierTarget.java
        BatchNotifierTarget.java
        Notifier.java
        json
        JsonWriter.java
        Json.java
        JsonArray.java
        JsonMap.java
        JsonNull.java
        JsonCollection.java
        Release.java
        SafeFileUpdater.java
        PathUtils.java
        FileScanner.java
        SelectionModel.java
        NewerDataFileVersionException.java
        character
        names
        USCensusNames.java
        Names.java
        Namer.java
        DisplayOption.java
        Profile.java
        DescriptionRandomizer.java
        DoubleOutlinePanel.java
        ReactionsOutline.java
        IdentityPanel.java
        EncumbrancePanel.java
        ReactionColumn.java
        WeaponOutline.java
        PageAssembler.java
        Armor.java
        CharacterSheet.java
        CmdLineExport.java
        OutlineInfo.java
        SettingsEditor.java
        CharacterFieldUndo.java
        CharacterSheetLayout.java
        LiftPanel.java
        HitPointsPanel.java
        Settings.java
        MiscPanel.java
        SingleOutlinePanel.java
        SheetDockable.java
        HitLocationTable.java
        PointsPanel.java
        HashedWeapon.java
        FatiguePointsPanel.java
        HitLocation.java
        TextTemplate.java
        PortraitPanel.java
        ReactionRow.java
        HitLocationPanel.java
        Encumbrance.java
        GURPSCharacter.java
        AttributesPanel.java
        HitLocationTableEntry.java
        DescriptionPanel.java
        preferences
        PreferencesWindow.java
        DisplayPreferences.java
        MenuKeyPreferences.java
        Preferences.java
        FontPreferences.java
        ReferenceLookupPreferences.java
        OutputPreferences.java
        PreferencePanel.java
        PortraitPreferencePanel.java
        SheetPreferences.java
        advantage
        SelfControlRoll.java
        AdvantageEditor.java
        AdvantageContainerType.java
        SelfControlRollAdjustments.java
        Levels.java
        AdvantageOutline.java
        AdvantagesDockable.java
        Advantage.java
        AdvantageColumn.java
        AdvantageList.java
        prereq
        ContainedWeightPrereq.java
        ContainedQuantityPrereq.java
        Prereq.java
        AdvantagePrereq.java
        NameLevelPrereq.java
        HasPrereq.java
        PrereqList.java
        SpellPrereq.java
        PrereqsPanel.java
        ContainedWeightPrereqEditor.java
        AndOrLabel.java
        SkillPrereq.java
        ContainedQuantityPrereqEditor.java
        AttributePrereqEditor.java
        AttributePrereq.java
        SkillPrereqEditor.java
        ListPrereqEditor.java
        SpellPrereqEditor.java
        PrereqEditor.java
        AdvantagePrereqEditor.java
        spell
        SpellColumn.java
        RitualMagicSpellEditor.java
        RitualMagicSpell.java
        Spell.java
        SpellsDockable.java
        SpellList.java
        SpellEditor.java
        BaseSpellEditor.java
        SpellOutline.java
        notes
        NoteList.java
        NoteOutline.java
        Note.java
        NoteColumn.java
        NotesDockable.java
        NoteEditor.java
        skill
        SkillEditor.java
        Technique.java
        SkillAttribute.java
        SkillDefaultEditor.java
        SkillLevel.java
        SkillPointsTextCell.java
        SkillDefault.java
        SkillsDockable.java
        Skill.java
        SkillDifficulty.java
        SkillDefaultType.java
        OrLabel.java
        Defaults.java
        TechniqueEditor.java
        SkillColumn.java
        SkillOutline.java
        SkillList.java
        datafile
        DataFileDockable.java
        ListFile.java
        DataFile.java
        LoadState.java
        template
        Template.java
        TemplateDockable.java
        TemplateSheet.java
        TemplateOutlinePanel.java
        feature
        SkillBonusEditor.java
        WeaponSelectionType.java
        WeightReductionFormatter.java
        ContainedWeightReductionEditor.java
        FeaturesPanel.java
        SkillSelectionType.java
        SpellBonus.java
        AttributeBonusLimitation.java
        DRBonusEditor.java
        CostReduction.java
        WeaponBonus.java
        SkillBonus.java
        CostReductionEditor.java
        LeveledAmount.java
        AttributeBonus.java
        ReactionBonusEditor.java
        ReactionBonus.java
        FeatureType.java
        SpellBonusEditor.java
        ContainedWeightReduction.java
        AttributeBonusEditor.java
        BonusAttributeType.java
        WeaponBonusEditor.java
        HitLocation.java
        Feature.java
        FeatureEditor.java
        DRBonus.java
        NoFeature.java
        Bonus.java
        pdfview
        PdfDockable.java
        PdfRef.java
        PdfRenderer.java
        PdfPanel.java
        GCS.java
    - module-info.java
  - resources
    - names
      - USCensus1990MaleFirstNames.txt
      - USCensus1990FemaleFirstNames.txt
    - images
  - com.trollworks.gcs.iml
- README.md
- .idea
  - codeStyles
    - codeStyleConfig.xml
    - Project.xml
  - inspectionProfiles
    - profiles_settings.xml
    - GCS.xml
  - modules.xml
  - dictionaries
  - encodings.xml
  - misc.xml
  - scopes
    - gcs.xml
  - gcs.iml
  - $CACHE_FILE$
  - vcs.xml
  - copyright
    - profiles_settings.xml
    - GCS.xml
  - .gitignore
- .gitignore

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pdfbox.text;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.StringWriter;
import java.io.Writer;
import java.text.Bidi;
import java.text.Normalizer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.StringTokenizer;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.regex.Pattern;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem;
import org.apache.pdfbox.pdmodel.interactive.pagenavigation.PDThreadBead;
import org.apache.pdfbox.util.QuickSort;

/**
 * This class will take a pdf document and strip out all of the text and ignore the formatting and such. Please note; it
 * is up to clients of this class to verify that a specific user has the correct permissions to extract text from the
 * PDF document.
 * 
 * The basic flow of this process is that we get a document and use a series of processXXX() functions that work on
 * smaller and smaller chunks of the page. Eventually, we fully process each page and then print it.
 *
 * @author Ben Litchfield
 */
public class PDFTextStripper extends LegacyPDFStreamEngine
{
    private static float defaultIndentThreshold = 2.0f;
    private static float defaultDropThreshold = 2.5f;
    private static final boolean useCustomQuickSort;

    private static final Log LOG = LogFactory.getLog(PDFTextStripper.class);

    // enable the ability to set the default indent/drop thresholds
    // with -D system properties:
    // pdftextstripper.indent
    // pdftextstripper.drop
    static
    {
        String strDrop = null, strIndent = null;
        try
        {
            String className = PDFTextStripper.class.getSimpleName().toLowerCase();
            String prop = className + ".indent";
            strIndent = System.getProperty(prop);
            prop = className + ".drop";
            strDrop = System.getProperty(prop);
        }
        catch (SecurityException e)
        {
            // PDFBOX-1946 when run in an applet
            // ignore and use default
        }
        if (strIndent != null && strIndent.length() > 0)
        {
            try
            {
                defaultIndentThreshold = Float.parseFloat(strIndent);
            }
            catch (NumberFormatException nfe)
            {
                // ignore and use default
            }
        }
        if (strDrop != null && strDrop.length() > 0)
        {
            try
            {
                defaultDropThreshold = Float.parseFloat(strDrop);
            }
            catch (NumberFormatException nfe)
            {
                // ignore and use default
            }
        }
    }
    
    static
    {
        // check if we need to use the custom quicksort algorithm as a
        // workaround to the PDFBOX-1512 transitivity issue of TextPositionComparator:
        boolean is16orLess = false;
        try
        {
            String version = System.getProperty("java.specification.version");
            StringTokenizer st = new StringTokenizer(version, ".");
            int majorVersion = Integer.parseInt(st.nextToken());
            int minorVersion = 0;
            if (st.hasMoreTokens())
            {
                minorVersion = Integer.parseInt(st.nextToken());
            }
            is16orLess = majorVersion == 1 && minorVersion <= 6;
        }
        catch (SecurityException x)
        {
            // when run in an applet ignore and use default
            // assume 1.7 or higher so that quicksort is used
        }
        catch (NumberFormatException nfe)
        {
            // should never happen, but if it does,
            // assume 1.7 or higher so that quicksort is used
        }
        useCustomQuickSort = !is16orLess;
    }

    /**
     * The platform's line separator.
     */
    protected final String LINE_SEPARATOR = System.getProperty("line.separator");

    private String lineSeparator = LINE_SEPARATOR;
    private String wordSeparator = " ";
    private String paragraphStart = "";
    private String paragraphEnd = "";
    private String pageStart = "";
    private String pageEnd = LINE_SEPARATOR;
    private String articleStart = "";
    private String articleEnd = "";

    private int currentPageNo = 0;
    private int startPage = 1;
    private int endPage = Integer.MAX_VALUE;
    private PDOutlineItem startBookmark = null;

    // 1-based bookmark pages
    private int startBookmarkPageNumber = -1;
    private int endBookmarkPageNumber = -1;

    private PDOutlineItem endBookmark = null;
    private boolean suppressDuplicateOverlappingText = true;
    private boolean shouldSeparateByBeads = true;
    private boolean sortByPosition = false;
    private boolean addMoreFormatting = false;

    private float indentThreshold = defaultIndentThreshold;
    private float dropThreshold = defaultDropThreshold;

    // we will need to estimate where to add spaces, these are used to help guess
    private float spacingTolerance = .5f;
    private float averageCharTolerance = .3f;

    private List<PDRectangle> beadRectangles = null;

    /**
     * The charactersByArticle is used to extract text by article divisions. For example a PDF that has two columns like
     * a newspaper, we want to extract the first column and then the second column. In this example the PDF would have 2
     * beads(or articles), one for each column. The size of the charactersByArticle would be 5, because not all text on
     * the screen will fall into one of the articles. The five divisions are shown below
     *
     * Text before first article
     * first article text
     * text between first article and second article
     * second article text
     * text after second article
     *
     * Most PDFs won't have any beads, so charactersByArticle will contain a single entry.
     */
    protected ArrayList<List<TextPosition>> charactersByArticle = new ArrayList<List<TextPosition>>();

    private Map<String, TreeMap<Float, TreeSet<Float>>> characterListMapping = new HashMap<String, TreeMap<Float, TreeSet<Float>>>();

    protected PDDocument document;
    protected Writer output;

    /**
     * True if we started a paragraph but haven't ended it yet.
     */
    private boolean inParagraph;

    /**
     * Instantiate a new PDFTextStripper object.
     *
     * @throws IOException If there is an error loading the properties.
     */
    public PDFTextStripper() throws IOException
    {
    }

    /**
     * This will return the text of a document. See writeText. <br>
     * NOTE: The document must not be encrypted when coming into this method.
     *
     * @param doc The document to get the text from.
     * @return The text of the PDF document.
     * @throws IOException if the doc state is invalid or it is encrypted.
     */
    public String getText(PDDocument doc) throws IOException
    {
        StringWriter outputStream = new StringWriter();
        writeText(doc, outputStream);
        return outputStream.toString();
    }

    private void resetEngine()
    {
        currentPageNo = 0;
        document = null;
        if (charactersByArticle != null)
        {
            charactersByArticle.clear();
        }
        if (characterListMapping != null)
        {
            characterListMapping.clear();
        }
    }

    /**
     * This will take a PDDocument and write the text of that document to the print writer.
     *
     * @param doc The document to get the data from.
     * @param outputStream The location to put the text.
     *
     * @throws IOException If the doc is in an invalid state.
     */
    public void writeText(PDDocument doc, Writer outputStream) throws IOException
    {
        resetEngine();
        document = doc;
        output = outputStream;
        if (getAddMoreFormatting())
        {
            paragraphEnd = lineSeparator;
            pageStart = lineSeparator;
            articleStart = lineSeparator;
            articleEnd = lineSeparator;
        }
        startDocument(document);
        processPages(document.getPages());
        endDocument(document);
    }

    /**
     * This will process all of the pages and the text that is in them.
     *
     * @param pages The pages object in the document.
     *
     * @throws IOException If there is an error parsing the text.
     */
    protected void processPages(PDPageTree pages) throws IOException
    {
        PDPage startBookmarkPage = startBookmark == null ? null
                : startBookmark.findDestinationPage(document);
        if (startBookmarkPage != null)
        {
            startBookmarkPageNumber = pages.indexOf(startBookmarkPage) + 1;
        }
        else
        {
            // -1 = undefined
            startBookmarkPageNumber = -1;
        }

        PDPage endBookmarkPage = endBookmark == null ? null
                : endBookmark.findDestinationPage(document);
        if (endBookmarkPage != null)
        {
            endBookmarkPageNumber = pages.indexOf(endBookmarkPage) + 1;
        }
        else
        {
            // -1 = undefined
            endBookmarkPageNumber = -1;
        }

        if (startBookmarkPageNumber == -1 && startBookmark != null && endBookmarkPageNumber == -1
                && endBookmark != null
                && startBookmark.getCOSObject() == endBookmark.getCOSObject())
        {
            // this is a special case where both the start and end bookmark
            // are the same but point to nothing. In this case
            // we will not extract any text.
            startBookmarkPageNumber = 0;
            endBookmarkPageNumber = 0;
        }

        for (PDPage page : pages)
        {
            currentPageNo++;
            if (page.hasContents())
            {
                processPage(page);
            }
        }
    }

    /**
     * This method is available for subclasses of this class. It will be called before processing of the document start.
     *
     * @param document The PDF document that is being processed.
     * @throws IOException If an IO error occurs.
     */
    protected void startDocument(PDDocument document) throws IOException
    {
        // no default implementation, but available for subclasses
    }

    /**
     * This method is available for subclasses of this class. It will be called after processing of the document
     * finishes.
     *
     * @param document The PDF document that is being processed.
     * @throws IOException If an IO error occurs.
     */
    protected void endDocument(PDDocument document) throws IOException
    {
        // no default implementation, but available for subclasses
    }

    /**
     * This will process the contents of a page.
     *
     * @param page The page to process.
     *
     * @throws IOException If there is an error processing the page.
     */
    @Override
    public void processPage(PDPage page) throws IOException
    {
        if (currentPageNo >= startPage && currentPageNo <= endPage
                && (startBookmarkPageNumber == -1 || currentPageNo >= startBookmarkPageNumber)
                && (endBookmarkPageNumber == -1 || currentPageNo <= endBookmarkPageNumber))
        {
            startPage(page);

            int numberOfArticleSections = 1;
            if (shouldSeparateByBeads)
            {
                fillBeadRectangles(page);
                numberOfArticleSections += beadRectangles.size() * 2;
            }
            int originalSize = charactersByArticle.size();
            charactersByArticle.ensureCapacity(numberOfArticleSections);
            int lastIndex = Math.max(numberOfArticleSections, originalSize);
            for (int i = 0; i < lastIndex; i++)
            {
                if (i < originalSize)
                {
                    charactersByArticle.get(i).clear();
                }
                else
                {
                    if (numberOfArticleSections < originalSize)
                    {
                        charactersByArticle.remove(i);
                    }
                    else
                    {
                        charactersByArticle.add(new ArrayList<TextPosition>());
                    }
                }
            }
            characterListMapping.clear();
            super.processPage(page);
            writePage();
            endPage(page);
        }
    }

    private void fillBeadRectangles(PDPage page)
    {
        beadRectangles = new ArrayList<PDRectangle>();
        for (PDThreadBead bead : page.getThreadBeads())
        {
            if (bead == null)
            {
                // can't skip, because of null entry handling in processTextPosition()
                beadRectangles.add(null);
                continue;
            }
            
            PDRectangle rect = bead.getRectangle();
            
            // bead rectangle is in PDF coordinates (y=0 is bottom),
            // glyphs are in image coordinates (y=0 is top),
            // so we must flip
            PDRectangle mediaBox = page.getMediaBox();
            float upperRightY = mediaBox.getUpperRightY() - rect.getLowerLeftY();
            float lowerLeftY = mediaBox.getUpperRightY() - rect.getUpperRightY();
            rect.setLowerLeftY(lowerLeftY);
            rect.setUpperRightY(upperRightY);
            
            // adjust for cropbox
            PDRectangle cropBox = page.getCropBox();
            if (cropBox.getLowerLeftX() != 0 || cropBox.getLowerLeftY() != 0)
            {
                rect.setLowerLeftX(rect.getLowerLeftX() - cropBox.getLowerLeftX());
                rect.setLowerLeftY(rect.getLowerLeftY() - cropBox.getLowerLeftY());
                rect.setUpperRightX(rect.getUpperRightX() - cropBox.getLowerLeftX());
                rect.setUpperRightY(rect.getUpperRightY() - cropBox.getLowerLeftY());
            }
            
            beadRectangles.add(rect);
        }
    }

    /**
     * Start a new article, which is typically defined as a column on a single page (also referred to as a bead). This
     * assumes that the primary direction of text is left to right. Default implementation is to do nothing. Subclasses
     * may provide additional information.
     *
     * @throws IOException If there is any error writing to the stream.
     */
    protected void startArticle() throws IOException
    {
        startArticle(true);
    }

    /**
     * Start a new article, which is typically defined as a column on a single page (also referred to as a bead).
     * Default implementation is to do nothing. Subclasses may provide additional information.
     *
     * @param isLTR true if primary direction of text is left to right.
     * @throws IOException If there is any error writing to the stream.
     */
    protected void startArticle(boolean isLTR) throws IOException
    {
        output.write(getArticleStart());
    }

    /**
     * End an article. Default implementation is to do nothing. Subclasses may provide additional information.
     *
     * @throws IOException If there is any error writing to the stream.
     */
    protected void endArticle() throws IOException
    {
        output.write(getArticleEnd());
    }

    /**
     * Start a new page. Default implementation is to do nothing. Subclasses may provide additional information.
     *
     * @param page The page we are about to process.
     *
     * @throws IOException If there is any error writing to the stream.
     */
    protected void startPage(PDPage page) throws IOException
    {
        // default is to do nothing
    }

    /**
     * End a page. Default implementation is to do nothing. Subclasses may provide additional information.
     *
     * @param page The page we are about to process.
     *
     * @throws IOException If there is any error writing to the stream.
     */
    protected void endPage(PDPage page) throws IOException
    {
        // default is to do nothing
    }

    private static final float END_OF_LAST_TEXT_X_RESET_VALUE = -1;
    private static final float MAX_Y_FOR_LINE_RESET_VALUE = -Float.MAX_VALUE;
    private static final float EXPECTED_START_OF_NEXT_WORD_X_RESET_VALUE = -Float.MAX_VALUE;
    private static final float MAX_HEIGHT_FOR_LINE_RESET_VALUE = -1;
    private static final float MIN_Y_TOP_FOR_LINE_RESET_VALUE = Float.MAX_VALUE;
    private static final float LAST_WORD_SPACING_RESET_VALUE = -1;

    /**
     * This will print the text of the processed page to "output". It will estimate, based on the coordinates of the
     * text, where newlines and word spacings should be placed. The text will be sorted only if that feature was
     * enabled.
     *
     * @throws IOException If there is an error writing the text.
     */
    protected void writePage() throws IOException
    {
        float maxYForLine = MAX_Y_FOR_LINE_RESET_VALUE;
        float minYTopForLine = MIN_Y_TOP_FOR_LINE_RESET_VALUE;
        float endOfLastTextX = END_OF_LAST_TEXT_X_RESET_VALUE;
        float lastWordSpacing = LAST_WORD_SPACING_RESET_VALUE;
        float maxHeightForLine = MAX_HEIGHT_FOR_LINE_RESET_VALUE;
        PositionWrapper lastPosition = null;
        PositionWrapper lastLineStartPosition = null;

        boolean startOfPage = true; // flag to indicate start of page
        boolean startOfArticle;
        if (charactersByArticle.size() > 0)
        {
            writePageStart();
        }

        for (List<TextPosition> textList : charactersByArticle)
        {
            if (getSortByPosition())
            {
                TextPositionComparator comparator = new TextPositionComparator();

                // because the TextPositionComparator is not transitive, but
                // JDK7+ enforces transitivity on comparators, we need to use
                // a custom quicksort implementation (which is slower, unfortunately).
                if (useCustomQuickSort)
                {
                    QuickSort.sort(textList, comparator);
                }
                else
                {
                    Collections.sort(textList, comparator);
                }
            }

            startArticle();
            startOfArticle = true;

            // Now cycle through to print the text.
            // We queue up a line at a time before we print so that we can convert
            // the line from presentation form to logical form (if needed).
            List<LineItem> line = new ArrayList<LineItem>();

            Iterator<TextPosition> textIter = textList.iterator();
            // PDF files don't always store spaces. We will need to guess where we should add
            // spaces based on the distances between TextPositions. Historically, this was done
            // based on the size of the space character provided by the font. In general, this
            // worked but there were cases where it did not work. Calculating the average character
            // width and using that as a metric works better in some cases but fails in some cases
            // where the spacing worked. So we use both. NOTE: Adobe reader also fails on some of
            // these examples.

            // Keeps track of the previous average character width
            float previousAveCharWidth = -1;
            while (textIter.hasNext())
            {
                TextPosition position = textIter.next();
                PositionWrapper current = new PositionWrapper(position);
                String characterValue = position.getUnicode();

                // Resets the average character width when we see a change in font
                // or a change in the font size
                if (lastPosition != null && (position.getFont() != lastPosition.getTextPosition()
                        .getFont()
                        || position.getFontSize() != lastPosition.getTextPosition().getFontSize()))
                {
                    previousAveCharWidth = -1;
                }

                float positionX;
                float positionY;
                float positionWidth;
                float positionHeight;

                // If we are sorting, then we need to use the text direction
                // adjusted coordinates, because they were used in the sorting.
                if (getSortByPosition())
                {
                    positionX = position.getXDirAdj();
                    positionY = position.getYDirAdj();
                    positionWidth = position.getWidthDirAdj();
                    positionHeight = position.getHeightDir();
                }
                else
                {
                    positionX = position.getX();
                    positionY = position.getY();
                    positionWidth = position.getWidth();
                    positionHeight = position.getHeight();
                }

                // The current amount of characters in a word
                int wordCharCount = position.getIndividualWidths().length;

                // Estimate the expected width of the space based on the
                // space character with some margin.
                float wordSpacing = position.getWidthOfSpace();
                float deltaSpace;
                if (wordSpacing == 0 || Float.isNaN(wordSpacing))
                {
                    deltaSpace = Float.MAX_VALUE;
                }
                else
                {
                    if (lastWordSpacing < 0)
                    {
                        deltaSpace = wordSpacing * getSpacingTolerance();
                    }
                    else
                    {
                        deltaSpace = (wordSpacing + lastWordSpacing) / 2f * getSpacingTolerance();
                    }
                }

                // Estimate the expected width of the space based on the average character width
                // with some margin. This calculation does not make a true average (average of
                // averages) but we found that it gave the best results after numerous experiments.
                // Based on experiments we also found that .3 worked well.
                float averageCharWidth;
                if (previousAveCharWidth < 0)
                {
                    averageCharWidth = positionWidth / wordCharCount;
                }
                else
                {
                    averageCharWidth = (previousAveCharWidth + positionWidth / wordCharCount) / 2f;
                }
                float deltaCharWidth = averageCharWidth * getAverageCharTolerance();

                // Compares the values obtained by the average method and the wordSpacing method
                // and picks the smaller number.
                float expectedStartOfNextWordX = EXPECTED_START_OF_NEXT_WORD_X_RESET_VALUE;
                if (endOfLastTextX != END_OF_LAST_TEXT_X_RESET_VALUE)
                {
                    expectedStartOfNextWordX = endOfLastTextX + Math.min(deltaSpace, deltaCharWidth);
                }

                if (lastPosition != null)
                {
                    if (startOfArticle)
                    {
                        lastPosition.setArticleStart();
                        startOfArticle = false;
                    }
                    // RDD - Here we determine whether this text object is on the current
                    // line. We use the lastBaselineFontSize to handle the superscript
                    // case, and the size of the current font to handle the subscript case.
                    // Text must overlap with the last rendered baseline text by at least
                    // a small amount in order to be considered as being on the same line.

                    // XXX BC: In theory, this check should really check if the next char is in
                    // full range seen in this line. This is what I tried to do with minYTopForLine,
                    // but this caused a lot of regression test failures. So, I'm leaving it be for
                    // now
                    if (!overlap(positionY, positionHeight, maxYForLine, maxHeightForLine))
                    {
                        writeLine(normalize(line));
                        line.clear();
                        lastLineStartPosition = handleLineSeparation(current, lastPosition,
                                lastLineStartPosition, maxHeightForLine);
                        expectedStartOfNextWordX = EXPECTED_START_OF_NEXT_WORD_X_RESET_VALUE;
                        maxYForLine = MAX_Y_FOR_LINE_RESET_VALUE;
                        maxHeightForLine = MAX_HEIGHT_FOR_LINE_RESET_VALUE;
                        minYTopForLine = MIN_Y_TOP_FOR_LINE_RESET_VALUE;
                    }
                    // test if our TextPosition starts after a new word would be expected to start
                    if (expectedStartOfNextWordX != EXPECTED_START_OF_NEXT_WORD_X_RESET_VALUE
                            && expectedStartOfNextWordX < positionX &&
                            // only bother adding a space if the last character was not a space
                            lastPosition.getTextPosition().getUnicode() != null
                            && !lastPosition.getTextPosition().getUnicode().endsWith(" "))
                    {
                        line.add(LineItem.getWordSeparator());
                    }
                }
                if (positionY >= maxYForLine)
                {
                    maxYForLine = positionY;
                }
                // RDD - endX is what PDF considers to be the x coordinate of the
                // end position of the text. We use it in computing our metrics below.
                endOfLastTextX = positionX + positionWidth;

                // add it to the list
                if (characterValue != null)
                {
                    if (startOfPage && lastPosition == null)
                    {
                        writeParagraphStart();// not sure this is correct for RTL?
                    }
                    line.add(new LineItem(position));
                }
                maxHeightForLine = Math.max(maxHeightForLine, positionHeight);
                minYTopForLine = Math.min(minYTopForLine, positionY - positionHeight);
                lastPosition = current;
                if (startOfPage)
                {
                    lastPosition.setParagraphStart();
                    lastPosition.setLineStart();
                    lastLineStartPosition = lastPosition;
                    startOfPage = false;
                }
                lastWordSpacing = wordSpacing;
                previousAveCharWidth = averageCharWidth;
            }
            // print the final line
            if (line.size() > 0)
            {
                writeLine(normalize(line));
                writeParagraphEnd();
            }
            endArticle();
        }
        writePageEnd();
    }

    private boolean overlap(float y1, float height1, float y2, float height2)
    {
        return within(y1, y2, .1f) || y2 <= y1 && y2 >= y1 - height1
                || y1 <= y2 && y1 >= y2 - height2;
    }

    /**
     * Write the line separator value to the output stream.
     * 
     * @throws IOException If there is a problem writing out the line separator to the document.
     */
    protected void writeLineSeparator() throws IOException
    {
        output.write(getLineSeparator());
    }

    /**
     * Write the word separator value to the output stream.
     * 
     * @throws IOException If there is a problem writing out the word separator to the document.
     */
    protected void writeWordSeparator() throws IOException
    {
        output.write(getWordSeparator());
    }

    /**
     * Write the string in TextPosition to the output stream.
     *
     * @param text The text to write to the stream.
     * @throws IOException If there is an error when writing the text.
     */
    protected void writeCharacters(TextPosition text) throws IOException
    {
        output.write(text.getUnicode());
    }

    /**
     * Write a Java string to the output stream. The default implementation will ignore the <code>textPositions</code>
     * and just calls {@link #writeString(String)}.
     *
     * @param text The text to write to the stream.
     * @param textPositions The TextPositions belonging to the text.
     * @throws IOException If there is an error when writing the text.
     */
    protected void writeString(String text, List<TextPosition> textPositions) throws IOException
    {
        writeString(text);
    }

    /**
     * Write a Java string to the output stream.
     *
     * @param text The text to write to the stream.
     * @throws IOException If there is an error when writing the text.
     */
    protected void writeString(String text) throws IOException
    {
        output.write(text);
    }

    /**
     * This will determine of two floating point numbers are within a specified variance.
     *
     * @param first The first number to compare to.
     * @param second The second number to compare to.
     * @param variance The allowed variance.
     */
    private boolean within(float first, float second, float variance)
    {
        return second < first + variance && second > first - variance;
    }

    /**
     * This will process a TextPosition object and add the text to the list of characters on a page. It takes care of
     * overlapping text.
     *
     * @param text The text to process.
     */
    @Override
    protected void processTextPosition(TextPosition text)
    {
        boolean showCharacter = true;
        if (suppressDuplicateOverlappingText)
        {
            showCharacter = false;
            String textCharacter = text.getUnicode();
            float textX = text.getX();
            float textY = text.getY();
            TreeMap<Float, TreeSet<Float>> sameTextCharacters = characterListMapping
                    .get(textCharacter);
            if (sameTextCharacters == null)
            {
                sameTextCharacters = new TreeMap<Float, TreeSet<Float>>();
                characterListMapping.put(textCharacter, sameTextCharacters);
            }
            // RDD - Here we compute the value that represents the end of the rendered
            // text. This value is used to determine whether subsequent text rendered
            // on the same line overwrites the current text.
            //
            // We subtract any positive padding to handle cases where extreme amounts
            // of padding are applied, then backed off (not sure why this is done, but there
            // are cases where the padding is on the order of 10x the character width, and
            // the TJ just backs up to compensate after each character). Also, we subtract
            // an amount to allow for kerning (a percentage of the width of the last
            // character).
            boolean suppressCharacter = false;
            float tolerance = text.getWidth() / textCharacter.length() / 3.0f;

            SortedMap<Float, TreeSet<Float>> xMatches = sameTextCharacters.subMap(textX - tolerance,
                    textX + tolerance);
            for (TreeSet<Float> xMatch : xMatches.values())
            {
                SortedSet<Float> yMatches = xMatch.subSet(textY - tolerance, textY + tolerance);
                if (!yMatches.isEmpty())
                {
                    suppressCharacter = true;
                    break;
                }
            }
            if (!suppressCharacter)
            {
                TreeSet<Float> ySet = sameTextCharacters.get(textX);
                if (ySet == null)
                {
                    ySet = new TreeSet<Float>();
                    sameTextCharacters.put(textX, ySet);
                }
                ySet.add(textY);
                showCharacter = true;
            }
        }
        if (showCharacter)
        {
            // if we are showing the character then we need to determine which article it belongs to
            int foundArticleDivisionIndex = -1;
            int notFoundButFirstLeftAndAboveArticleDivisionIndex = -1;
            int notFoundButFirstLeftArticleDivisionIndex = -1;
            int notFoundButFirstAboveArticleDivisionIndex = -1;
            float x = text.getX();
            float y = text.getY();
            if (shouldSeparateByBeads)
            {
                for (int i = 0; i < beadRectangles.size() && foundArticleDivisionIndex == -1; i++)
                {
                    PDRectangle rect = beadRectangles.get(i);
                    if (rect != null)
                    {
                        if (rect.contains(x, y))
                        {
                            foundArticleDivisionIndex = i * 2 + 1;
                        }
                        else if ((x < rect.getLowerLeftX() || y < rect.getUpperRightY())
                                && notFoundButFirstLeftAndAboveArticleDivisionIndex == -1)
                        {
                            notFoundButFirstLeftAndAboveArticleDivisionIndex = i * 2;
                        }
                        else if (x < rect.getLowerLeftX()
                                && notFoundButFirstLeftArticleDivisionIndex == -1)
                        {
                            notFoundButFirstLeftArticleDivisionIndex = i * 2;
                        }
                        else if (y < rect.getUpperRightY()
                                && notFoundButFirstAboveArticleDivisionIndex == -1)
                        {
                            notFoundButFirstAboveArticleDivisionIndex = i * 2;
                        }
                    }
                    else
                    {
                        foundArticleDivisionIndex = 0;
                    }
                }
            }
            else
            {
                foundArticleDivisionIndex = 0;
            }
            int articleDivisionIndex;
            if (foundArticleDivisionIndex != -1)
            {
                articleDivisionIndex = foundArticleDivisionIndex;
            }
            else if (notFoundButFirstLeftAndAboveArticleDivisionIndex != -1)
            {
                articleDivisionIndex = notFoundButFirstLeftAndAboveArticleDivisionIndex;
            }
            else if (notFoundButFirstLeftArticleDivisionIndex != -1)
            {
                articleDivisionIndex = notFoundButFirstLeftArticleDivisionIndex;
            }
            else if (notFoundButFirstAboveArticleDivisionIndex != -1)
            {
                articleDivisionIndex = notFoundButFirstAboveArticleDivisionIndex;
            }
            else
            {
                articleDivisionIndex = charactersByArticle.size() - 1;
            }

            List<TextPosition> textList = charactersByArticle.get(articleDivisionIndex);

            // In the wild, some PDF encoded documents put diacritics (accents on
            // top of characters) into a separate Tj element. When displaying them
            // graphically, the two chunks get overlayed. With text output though,
            // we need to do the overlay. This code recombines the diacritic with
            // its associated character if the two are consecutive.
            if (textList.isEmpty())
            {
                textList.add(text);
            }
            else
            {
                // test if we overlap the previous entry.
                // Note that we are making an assumption that we need to only look back
                // one TextPosition to find what we are overlapping.
                // This may not always be true. */
                TextPosition previousTextPosition = textList.get(textList.size() - 1);
                if (text.isDiacritic() && previousTextPosition.contains(text))
                {
                    previousTextPosition.mergeDiacritic(text);
                }
                // If the previous TextPosition was the diacritic, merge it into this
                // one and remove it from the list.
                else if (previousTextPosition.isDiacritic() && text.contains(previousTextPosition))
                {
                    text.mergeDiacritic(previousTextPosition);
                    textList.remove(textList.size() - 1);
                    textList.add(text);
                }
                else
                {
                    textList.add(text);
                }
            }
        }
    }

    /**
     * This is the page that the text extraction will start on. The pages start at page 1. For example in a 5 page PDF
     * document, if the start page is 1 then all pages will be extracted. If the start page is 4 then pages 4 and 5 will
     * be extracted. The default value is 1.
     *
     * @return Value of property startPage.
     */
    public int getStartPage()
    {
        return startPage;
    }

    /**
     * This will set the first page to be extracted by this class.
     *
     * @param startPageValue New value of 1-based startPage property.
     */
    public void setStartPage(int startPageValue)
    {
        startPage = startPageValue;
    }

    /**
     * This will get the last page that will be extracted. This is inclusive, for example if a 5 page PDF an endPage
     * value of 5 would extract the entire document, an end page of 2 would extract pages 1 and 2. This defaults to
     * Integer.MAX_VALUE such that all pages of the pdf will be extracted.
     *
     * @return Value of property endPage.
     */
    public int getEndPage()
    {
        return endPage;
    }

    /**
     * This will set the last page to be extracted by this class.
     *
     * @param endPageValue New value of 1-based endPage property.
     */
    public void setEndPage(int endPageValue)
    {
        endPage = endPageValue;
    }

    /**
     * Set the desired line separator for output text. The line.separator system property is used if the line separator
     * preference is not set explicitly using this method.
     *
     * @param separator The desired line separator string.
     */
    public void setLineSeparator(String separator)
    {
        lineSeparator = separator;
    }

    /**
     * This will get the line separator.
     *
     * @return The desired line separator string.
     */
    public String getLineSeparator()
    {
        return lineSeparator;
    }

    /**
     * This will get the word separator.
     *
     * @return The desired word separator string.
     */
    public String getWordSeparator()
    {
        return wordSeparator;
    }

    /**
     * Set the desired word separator for output text. The PDFBox text extraction algorithm will output a space
     * character if there is enough space between two words. By default a space character is used. If you need and
     * accurate count of characters that are found in a PDF document then you might want to set the word separator to
     * the empty string.
     *
     * @param separator The desired page separator string.
     */
    public void setWordSeparator(String separator)
    {
        wordSeparator = separator;
    }

    /**
     * @return Returns the suppressDuplicateOverlappingText.
     */
    public boolean getSuppressDuplicateOverlappingText()
    {
        return suppressDuplicateOverlappingText;
    }

    /**
     * Get the current page number that is being processed.
     *
     * @return A 1 based number representing the current page.
     */
    protected int getCurrentPageNo()
    {
        return currentPageNo;
    }

    /**
     * The output stream that is being written to.
     *
     * @return The stream that output is being written to.
     */
    protected Writer getOutput()
    {
        return output;
    }

    /**
     * Character strings are grouped by articles. It is quite common that there will only be a single article. This
     * returns a List that contains List objects, the inner lists will contain TextPosition objects.
     *
     * @return A double List of TextPositions for all text strings on the page.
     */
    protected List<List<TextPosition>> getCharactersByArticle()
    {
        return charactersByArticle;
    }

    /**
     * By default the text stripper will attempt to remove text that overlapps each other. Word paints the same
     * character several times in order to make it look bold. By setting this to false all text will be extracted, which
     * means that certain sections will be duplicated, but better performance will be noticed.
     *
     * @param suppressDuplicateOverlappingTextValue The suppressDuplicateOverlappingText to set.
     */
    public void setSuppressDuplicateOverlappingText(boolean suppressDuplicateOverlappingTextValue)
    {
        suppressDuplicateOverlappingText = suppressDuplicateOverlappingTextValue;
    }

    /**
     * This will tell if the text stripper should separate by beads.
     *
     * @return If the text will be grouped by beads.
     */
    public boolean getSeparateByBeads()
    {
        return shouldSeparateByBeads;
    }

    /**
     * Set if the text stripper should group the text output by a list of beads. The default value is true!
     *
     * @param aShouldSeparateByBeads The new grouping of beads.
     */
    public void setShouldSeparateByBeads(boolean aShouldSeparateByBeads)
    {
        shouldSeparateByBeads = aShouldSeparateByBeads;
    }

    /**
     * Get the bookmark where text extraction should end, inclusive. Default is null.
     *
     * @return The ending bookmark.
     */
    public PDOutlineItem getEndBookmark()
    {
        return endBookmark;
    }

    /**
     * Set the bookmark where the text extraction should stop.
     *
     * @param aEndBookmark The ending bookmark.
     */
    public void setEndBookmark(PDOutlineItem aEndBookmark)
    {
        endBookmark = aEndBookmark;
    }

    /**
     * Get the bookmark where text extraction should start, inclusive. Default is null.
     *
     * @return The starting bookmark.
     */
    public PDOutlineItem getStartBookmark()
    {
        return startBookmark;
    }

    /**
     * Set the bookmark where text extraction should start, inclusive.
     *
     * @param aStartBookmark The starting bookmark.
     */
    public void setStartBookmark(PDOutlineItem aStartBookmark)
    {
        startBookmark = aStartBookmark;
    }

    /**
     * This will tell if the text stripper should add some more text formatting.
     * 
     * @return true if some more text formatting will be added
     */
    public boolean getAddMoreFormatting()
    {
        return addMoreFormatting;
    }

    /**
     * There will some additional text formatting be added if addMoreFormatting is set to true. Default is false.
     * 
     * @param newAddMoreFormatting Tell PDFBox to add some more text formatting
     */
    public void setAddMoreFormatting(boolean newAddMoreFormatting)
    {
        addMoreFormatting = newAddMoreFormatting;
    }

    /**
     * This will tell if the text stripper should sort the text tokens before writing to the stream.
     *
     * @return true If the text tokens will be sorted before being written.
     */
    public boolean getSortByPosition()
    {
        return sortByPosition;
    }

    /**
     * The order of the text tokens in a PDF file may not be in the same as they appear visually on the screen. For
     * example, a PDF writer may write out all text by font, so all bold or larger text, then make a second pass and
     * write out the normal text.<br>
     * The default is to <b>not</b> sort by position.<br>
     * <br>
     * A PDF writer could choose to write each character in a different order. By default PDFBox does <b>not</b> sort
     * the text tokens before processing them due to performance reasons.
     *
     * @param newSortByPosition Tell PDFBox to sort the text positions.
     */
    public void setSortByPosition(boolean newSortByPosition)
    {
        sortByPosition = newSortByPosition;
    }

    /**
     * Get the current space width-based tolerance value that is being used to estimate where spaces in text should be
     * added. Note that the default value for this has been determined from trial and error.
     * 
     * @return The current tolerance / scaling factor
     */
    public float getSpacingTolerance()
    {
        return spacingTolerance;
    }

    /**
     * Set the space width-based tolerance value that is used to estimate where spaces in text should be added. Note
     * that the default value for this has been determined from trial and error. Setting this value larger will reduce
     * the number of spaces added.
     * 
     * @param spacingToleranceValue tolerance / scaling factor to use
     */
    public void setSpacingTolerance(float spacingToleranceValue)
    {
        spacingTolerance = spacingToleranceValue;
    }

    /**
     * Get the current character width-based tolerance value that is being used to estimate where spaces in text should
     * be added. Note that the default value for this has been determined from trial and error.
     * 
     * @return The current tolerance / scaling factor
     */
    public float getAverageCharTolerance()
    {
        return averageCharTolerance;
    }

    /**
     * Set the character width-based tolerance value that is used to estimate where spaces in text should be added. Note
     * that the default value for this has been determined from trial and error. Setting this value larger will reduce
     * the number of spaces added.
     * 
     * @param averageCharToleranceValue average tolerance / scaling factor to use
     */
    public void setAverageCharTolerance(float averageCharToleranceValue)
    {
        averageCharTolerance = averageCharToleranceValue;
    }

    /**
     * returns the multiple of whitespace character widths for the current text which the current line start can be
     * indented from the previous line start beyond which the current line start is considered to be a paragraph start.
     * 
     * @return the number of whitespace character widths to use when detecting paragraph indents.
     */
    public float getIndentThreshold()
    {
        return indentThreshold;
    }

    /**
     * sets the multiple of whitespace character widths for the current text which the current line start can be
     * indented from the previous line start beyond which the current line start is considered to be a paragraph start.
     * The default value is 2.0.
     *
     * @param indentThresholdValue the number of whitespace character widths to use when detecting paragraph indents.
     */
    public void setIndentThreshold(float indentThresholdValue)
    {
        indentThreshold = indentThresholdValue;
    }

    /**
     * the minimum whitespace, as a multiple of the max height of the current characters beyond which the current line
     * start is considered to be a paragraph start.
     * 
     * @return the character height multiple for max allowed whitespace between lines in the same paragraph.
     */
    public float getDropThreshold()
    {
        return dropThreshold;
    }

    /**
     * sets the minimum whitespace, as a multiple of the max height of the current characters beyond which the current
     * line start is considered to be a paragraph start. The default value is 2.5.
     *
     * @param dropThresholdValue the character height multiple for max allowed whitespace between lines in the same
     * paragraph.
     */
    public void setDropThreshold(float dropThresholdValue)
    {
        dropThreshold = dropThresholdValue;
    }

    /**
     * Returns the string which will be used at the beginning of a paragraph.
     * 
     * @return the paragraph start string
     */
    public String getParagraphStart()
    {
        return paragraphStart;
    }

    /**
     * Sets the string which will be used at the beginning of a paragraph.
     * 
     * @param s the paragraph start string
     */
    public void setParagraphStart(String s)
    {
        paragraphStart = s;
    }

    /**
     * Returns the string which will be used at the end of a paragraph.
     * 
     * @return the paragraph end string
     */
    public String getParagraphEnd()
    {
        return paragraphEnd;
    }

    /**
     * Sets the string which will be used at the end of a paragraph.
     * 
     * @param s the paragraph end string
     */
    public void setParagraphEnd(String s)
    {
        paragraphEnd = s;
    }

    /**
     * Returns the string which will be used at the beginning of a page.
     * 
     * @return the page start string
     */
    public String getPageStart()
    {
        return pageStart;
    }

    /**
     * Sets the string which will be used at the beginning of a page.
     * 
     * @param pageStartValue the page start string
     */
    public void setPageStart(String pageStartValue)
    {
        pageStart = pageStartValue;
    }

    /**
     * Returns the string which will be used at the end of a page.
     * 
     * @return the page end string
     */
    public String getPageEnd()
    {
        return pageEnd;
    }

    /**
     * Sets the string which will be used at the end of a page.
     * 
     * @param pageEndValue the page end string
     */
    public void setPageEnd(String pageEndValue)
    {
        pageEnd = pageEndValue;
    }

    /**
     * Returns the string which will be used at the beginning of an article.
     * 
     * @return the article start string
     */
    public String getArticleStart()
    {
        return articleStart;
    }

    /**
     * Sets the string which will be used at the beginning of an article.
     * 
     * @param articleStartValue the article start string
     */
    public void setArticleStart(String articleStartValue)
    {
        articleStart = articleStartValue;
    }

    /**
     * Returns the string which will be used at the end of an article.
     * 
     * @return the article end string
     */
    public String getArticleEnd()
    {
        return articleEnd;
    }

    /**
     * Sets the string which will be used at the end of an article.
     * 
     * @param articleEndValue the article end string
     */
    public void setArticleEnd(String articleEndValue)
    {
        articleEnd = articleEndValue;
    }

    /**
     * handles the line separator for a new line given the specified current and previous TextPositions.
     * 
     * @param current the current text position
     * @param lastPosition the previous text position
     * @param lastLineStartPosition the last text position that followed a line separator.
     * @param maxHeightForLine max height for positions since lastLineStartPosition
     * @return start position of the last line
     * @throws IOException if something went wrong
     */
    private PositionWrapper handleLineSeparation(PositionWrapper current,
            PositionWrapper lastPosition, PositionWrapper lastLineStartPosition,
            float maxHeightForLine) throws IOException
    {
        current.setLineStart();
        isParagraphSeparation(current, lastPosition, lastLineStartPosition, maxHeightForLine);
        lastLineStartPosition = current;
        if (current.isParagraphStart())
        {
            if (lastPosition.isArticleStart())
            {
                if (lastPosition.isLineStart())
                {
                    writeLineSeparator();
                }
                writeParagraphStart();
            }
            else
            {
                writeLineSeparator();
                writeParagraphSeparator();
            }
        }
        else
        {
            writeLineSeparator();
        }
        return lastLineStartPosition;
    }

    /**
     * tests the relationship between the last text position, the current text position and the last text position that
     * followed a line separator to decide if the gap represents a paragraph separation. This should <i>only</i> be
     * called for consecutive text positions that first pass the line separation test.
     * <p>
     * This base implementation tests to see if the lastLineStartPosition is null OR if the current vertical position
     * has dropped below the last text vertical position by at least 2.5 times the current text height OR if the current
     * horizontal position is indented by at least 2 times the current width of a space character.
     * </p>
     * <p>
     * This also attempts to identify text that is indented under a hanging indent.
     * </p>
     * <p>
     * This method sets the isParagraphStart and isHangingIndent flags on the current position object.
     * </p>
     *
     * @param position the current text position. This may have its isParagraphStart or isHangingIndent flags set upon
     * return.
     * @param lastPosition the previous text position (should not be null).
     * @param lastLineStartPosition the last text position that followed a line separator, or null.
     * @param maxHeightForLine max height for text positions since lasLineStartPosition.
     */
    private void isParagraphSeparation(PositionWrapper position, PositionWrapper lastPosition,
            PositionWrapper lastLineStartPosition, float maxHeightForLine)
    {
        boolean result = false;
        if (lastLineStartPosition == null)
        {
            result = true;
        }
        else
        {
            float yGap = Math.abs(position.getTextPosition().getYDirAdj()
                    - lastPosition.getTextPosition().getYDirAdj());
            float newYVal = multiplyFloat(getDropThreshold(), maxHeightForLine);
            // do we need to flip this for rtl?
            float xGap = position.getTextPosition().getXDirAdj()
                    - lastLineStartPosition.getTextPosition().getXDirAdj();
            float newXVal = multiplyFloat(getIndentThreshold(),
                    position.getTextPosition().getWidthOfSpace());
            float positionWidth = multiplyFloat(0.25f, position.getTextPosition().getWidth());

            if (yGap > newYVal)
            {
                result = true;
            }
            else if (xGap > newXVal)
            {
                // text is indented, but try to screen for hanging indent
                if (!lastLineStartPosition.isParagraphStart())
                {
                    result = true;
                }
                else
                {
                    position.setHangingIndent();
                }
            }
            else if (xGap < -position.getTextPosition().getWidthOfSpace())
            {
                // text is left of previous line. Was it a hanging indent?
                if (!lastLineStartPosition.isParagraphStart())
                {
                    result = true;
                }
            }
            else if (Math.abs(xGap) < positionWidth)
            {
                // current horizontal position is within 1/4 a char of the last
                // linestart. We'll treat them as lined up.
                if (lastLineStartPosition.isHangingIndent())
                {
                    position.setHangingIndent();
                }
                else if (lastLineStartPosition.isParagraphStart())
                {
                    // check to see if the previous line looks like
                    // any of a number of standard list item formats
                    Pattern liPattern = matchListItemPattern(lastLineStartPosition);
                    if (liPattern != null)
                    {
                        Pattern currentPattern = matchListItemPattern(position);
                        if (liPattern == currentPattern)
                        {
                            result = true;
                        }
                    }
                }
            }
        }
        if (result)
        {
            position.setParagraphStart();
        }
    }

    private float multiplyFloat(float value1, float value2)
    {
        // multiply 2 floats and truncate the resulting value to 3 decimal places
        // to avoid wrong results when comparing with another float
        return Math.round(value1 * value2 * 1000) / 1000f;
    }

    /**
     * writes the paragraph separator string to the output.
     * 
     * @throws IOException if something went wrong
     */
    protected void writeParagraphSeparator() throws IOException
    {
        writeParagraphEnd();
        writeParagraphStart();
    }

    /**
     * Write something (if defined) at the start of a paragraph.
     * 
     * @throws IOException if something went wrong
     */
    protected void writeParagraphStart() throws IOException
    {
        if (inParagraph)
        {
            writeParagraphEnd();
            inParagraph = false;
        }
        output.write(getParagraphStart());
        inParagraph = true;
    }

    /**
     * Write something (if defined) at the end of a paragraph.
     * 
     * @throws IOException if something went wrong
     */
    protected void writeParagraphEnd() throws IOException
    {
        if (!inParagraph)
        {
            writeParagraphStart();
        }
        output.write(getParagraphEnd());
        inParagraph = false;
    }

    /**
     * Write something (if defined) at the start of a page.
     * 
     * @throws IOException if something went wrong
     */
    protected void writePageStart() throws IOException
    {
        output.write(getPageStart());
    }

    /**
     * Write something (if defined) at the end of a page.
     * 
     * @throws IOException if something went wrong
     */
    protected void writePageEnd() throws IOException
    {
        output.write(getPageEnd());
    }

    /**
     * returns the list item Pattern object that matches the text at the specified PositionWrapper or null if the text
     * does not match such a pattern. The list of Patterns tested against is given by the {@link #getListItemPatterns()}
     * method. To add to the list, simply override that method (if sub-classing) or explicitly supply your own list
     * using {@link #setListItemPatterns(List)}.
     * 
     * @param pw position
     * @return the matching pattern
     */
    private Pattern matchListItemPattern(PositionWrapper pw)
    {
        TextPosition tp = pw.getTextPosition();
        String txt = tp.getUnicode();
        return matchPattern(txt, getListItemPatterns());
    }

    /**
     * a list of regular expressions that match commonly used list item formats, i.e. bullets, numbers, letters, Roman
     * numerals, etc. Not meant to be comprehensive.
     */
    private static final String[] LIST_ITEM_EXPRESSIONS = { "\\.", "\\d+\\.", "\\[\\d+\\]",
            "\\d+\\)", "[A-Z]\\.", "[a-z]\\.", "[A-Z]\\)", "[a-z]\\)", "[IVXL]+\\.",
            "[ivxl]+\\.", };

    private List<Pattern> listOfPatterns = null;

    /**
     * use to supply a different set of regular expression patterns for matching list item starts.
     *
     * @param patterns list of patterns
     */
    protected void setListItemPatterns(List<Pattern> patterns)
    {
        listOfPatterns = patterns;
    }

    /**
     * returns a list of regular expression Patterns representing different common list item formats. For example
     * numbered items of form:
     * <ol>
     * <li>some text</li>
     * <li>more text</li>
     * </ol>
     * or
     * <ul>
     * <li>some text</li>
     * <li>more text</li>
     * </ul>
     * etc., all begin with some character pattern. The pattern "\\d+\." (matches "1.", "2.", ...) or "\[\\d+\]"
     * (matches "[1]", "[2]", ...).
     * <p>
     * This method returns a list of such regular expression Patterns.
     * 
     * @return a list of Pattern objects.
     */
    protected List<Pattern> getListItemPatterns()
    {
        if (listOfPatterns == null)
        {
            listOfPatterns = new ArrayList<Pattern>();
            for (String expression : LIST_ITEM_EXPRESSIONS)
            {
                Pattern p = Pattern.compile(expression);
                listOfPatterns.add(p);
            }
        }
        return listOfPatterns;
    }

    /**
     * iterates over the specified list of Patterns until it finds one that matches the specified string. Then returns
     * the Pattern.
     * <p>
     * Order of the supplied list of patterns is important as most common patterns should come first. Patterns should be
     * strict in general, and all will be used with case sensitivity on.
     * </p>
     * 
     * @param string the string to be searched
     * @param patterns list of patterns
     * @return matching pattern
     */
    protected static Pattern matchPattern(String string, List<Pattern> patterns)
    {
        for (Pattern p : patterns)
        {
            if (p.matcher(string).matches())
            {
                return p;
            }
        }
        return null;
    }

    /**
     * Write a list of string containing a whole line of a document.
     * 
     * @param line a list with the words of the given line
     * @throws IOException if something went wrong
     */
    private void writeLine(List<WordWithTextPositions> line)
            throws IOException
    {
        int numberOfStrings = line.size();
        for (int i = 0; i < numberOfStrings; i++)
        {
            WordWithTextPositions word = line.get(i);
            writeString(word.getText(), word.getTextPositions());
            if (i < numberOfStrings - 1)
            {
                writeWordSeparator();
            }
        }
    }

    /**
     * Normalize the given list of TextPositions.
     * 
     * @param line list of TextPositions
     * @return a list of strings, one string for every word
     */
    private List<WordWithTextPositions> normalize(List<LineItem> line)
    {
        List<WordWithTextPositions> normalized = new LinkedList<WordWithTextPositions>();
        StringBuilder lineBuilder = new StringBuilder();
        List<TextPosition> wordPositions = new ArrayList<TextPosition>();

        for (LineItem item : line)
        {
            lineBuilder = normalizeAdd(normalized, lineBuilder, wordPositions, item);
        }

        if (lineBuilder.length() > 0)
        {
            normalized.add(createWord(lineBuilder.toString(), wordPositions));
        }
        return normalized;
    }

    /**
     * Handles the LTR and RTL direction of the given words. The whole implementation stands and falls with the given
     * word. If the word is a full line, the results will be the best. If the word contains of single words or
     * characters, the order of the characters in a word or words in a line may wrong, due to RTL and LTR marks and
     * characters!
     * 
     * Based on http://www.nesterovsky-bros.com/weblog/2013/07/28/VisualToLogicalConversionInJava.aspx
     * 
     * @param word The word that shall be processed
     * @return new word with the correct direction of the containing characters
     */
    private String handleDirection(String word)
    {
        Bidi bidi = new Bidi(word, Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT);

        // if there is pure LTR text no need to process further
        if (!bidi.isMixed() && bidi.getBaseLevel() == Bidi.DIRECTION_LEFT_TO_RIGHT)
        {
            return word;
        }
        
        // collect individual bidi information
        int runCount = bidi.getRunCount();
        byte[] levels = new byte[runCount];
        Integer[] runs = new Integer[runCount];
      
        for (int i = 0; i < runCount; i++)
        {
           levels[i] = (byte)bidi.getRunLevel(i);
           runs[i] = i;
        }

        // reorder individual parts based on their levels
        Bidi.reorderVisually(levels, 0, runs, 0, runCount);
        
        // collect the parts based on the direction within the run
        StringBuilder result = new StringBuilder();

        for (int i = 0; i < runCount; i++)
        {
           int index = runs[i];
           int start = bidi.getRunStart(index);
           int end = bidi.getRunLimit(index);

            int level = levels[index];

            if ((level & 1) != 0)
            {
                while (--end >= start)
                {
                    char character = word.charAt(end);
                    if (Character.isMirrored(word.codePointAt(end)))
                    {
                        if (MIRRORING_CHAR_MAP.containsKey(character))
                        {
                            result.append(MIRRORING_CHAR_MAP.get(character));
                        }
                        else
                        {
                            result.append(character);
                        }
                    }
                    else
                    {
                        result.append(character);
                    }
                }
            }
            else
            {
                result.append(word, start, end);
            }
        }
        
        return result.toString();
    }

    private static Map<Character, Character> MIRRORING_CHAR_MAP = new HashMap<Character, Character>();

    static
    {
        String path = "/org/apache/pdfbox/resources/text/BidiMirroring.txt";
        InputStream input = PDFTextStripper.class.getResourceAsStream(path);
        try
        {
            if (input != null)
            {
                parseBidiFile(input);
            }
            else
            {
                LOG.warn("Could not find '" + path + "', mirroring char map will be empty: ");
            }
        }
        catch (IOException e)
        {
            LOG.warn("Could not parse BidiMirroring.txt, mirroring char map will be empty: "
                    + e.getMessage());
        }
        finally
        {
            try
            {
                input.close();
            }
            catch (IOException e)
            {
                LOG.error("Could not close BidiMirroring.txt ", e);
            }
        }
    }

    /**
     * This method parses the bidi file provided as inputstream.
     * 
     * @param inputStream - The bidi file as inputstream
     * @throws IOException if any line could not be read by the LineNumberReader
     */
    private static void parseBidiFile(InputStream inputStream) throws IOException
    {
        LineNumberReader rd = new LineNumberReader(new InputStreamReader(inputStream));

        do
        {
            String s = rd.readLine();
            if (s == null)
            {
                break;
            }

            int comment = s.indexOf('#'); // ignore comments
            if (comment != -1)
            {
                s = s.substring(0, comment);
            }

            if (s.length() < 2)
            {
                continue;
            }

            StringTokenizer st = new StringTokenizer(s, ";");
            int nFields = st.countTokens();
            Character[] fields = new Character[nFields];
            for (int i = 0; i < nFields; i++)
            {
                fields[i] = (char) Integer.parseInt(st.nextToken().trim(), 16);
            }

            if (fields.length == 2)
            {
                // initialize the MIRRORING_CHAR_MAP
                MIRRORING_CHAR_MAP.put(fields[0], fields[1]);
            }

        } while (true);
    }

    /**
     * Used within {@link #normalize(List)} to create a single {@link WordWithTextPositions} entry.
     */
    private WordWithTextPositions createWord(String word, List<TextPosition> wordPositions)
    {
        return new WordWithTextPositions(normalizeWord(word), wordPositions);
    }

    /**
     * Normalize certain Unicode characters. For example, convert the single "fi" ligature to "f" and "i". Also
     * normalises Arabic and Hebrew presentation forms.
     *
     * @param word Word to normalize
     * @return Normalized word
     */
    private String normalizeWord(String word)
    {
        StringBuilder builder = null;
        int p = 0;
        int q = 0;
        int strLength = word.length();
        for (; q < strLength; q++)
        {
            // We only normalize if the codepoint is in a given range.
            // Otherwise, NFKC converts too many things that would cause
            // confusion. For example, it converts the micro symbol in
            // extended Latin to the value in the Greek script. We normalize
            // the Unicode Alphabetic and Arabic A&B Presentation forms.
            char c = word.charAt(q);
            if (0xFB00 <= c && c <= 0xFDFF || 0xFE70 <= c && c <= 0xFEFF)
            {
                if (builder == null)
                {
                    builder = new StringBuilder(strLength * 2);
                }
                builder.append(word.substring(p, q));
                // Some fonts map U+FDF2 differently than the Unicode spec.
                // They add an extra U+0627 character to compensate.
                // This removes the extra character for those fonts.
                if (c == 0xFDF2 && q > 0
                        && (word.charAt(q - 1) == 0x0627 || word.charAt(q - 1) == 0xFE8D))
                {
                    builder.append("\u0644\u0644\u0647");
                }
                else
                {
                    // Trim because some decompositions have an extra space, such as U+FC5E
                    builder.append(Normalizer
                            .normalize(word.substring(q, q + 1), Normalizer.Form.NFKC).trim());
                }
                p = q + 1;
            }
        }
        if (builder == null)
        {
            return handleDirection(word);
        }
        else
        {
            builder.append(word.substring(p, q));
            return handleDirection(builder.toString());
        }
    }

    /**
     * Used within {@link #normalize(List)} to handle a {@link TextPosition}.
     * 
     * @return The StringBuilder that must be used when calling this method.
     */
    private StringBuilder normalizeAdd(List<WordWithTextPositions> normalized,
            StringBuilder lineBuilder, List<TextPosition> wordPositions, LineItem item)
    {
        if (item.isWordSeparator())
        {
            normalized.add(
                    createWord(lineBuilder.toString(), new ArrayList<TextPosition>(wordPositions)));
            lineBuilder = new StringBuilder();
            wordPositions.clear();
        }
        else
        {
            TextPosition text = item.getTextPosition();
            lineBuilder.append(text.getUnicode());
            wordPositions.add(text);
        }
        return lineBuilder;
    }

    /**
     * internal marker class. Used as a place holder in a line of TextPositions.
     */
    private static final class LineItem
    {
        public static LineItem WORD_SEPARATOR = new LineItem();

        public static LineItem getWordSeparator()
        {
            return WORD_SEPARATOR;
        }

        private final TextPosition textPosition;

        private LineItem()
        {
            textPosition = null;
        }

        LineItem(TextPosition textPosition)
        {
            this.textPosition = textPosition;
        }

        public TextPosition getTextPosition()
        {
            return textPosition;
        }

        public boolean isWordSeparator()
        {
            return textPosition == null;
        }
    }

    /**
     * Internal class that maps strings to lists of {@link TextPosition} arrays. Note that the number of entries in that
     * list may differ from the number of characters in the string due to normalization.
     *
     * @author Axel Dörfler
     */
    private static final class WordWithTextPositions
    {
        String text;
        List<TextPosition> textPositions;

        WordWithTextPositions(String word, List<TextPosition> positions)
        {
            text = word;
            textPositions = positions;
        }

        public String getText()
        {
            return text;
        }

        public List<TextPosition> getTextPositions()
        {
            return textPositions;
        }
    }

    /**
     * wrapper of TextPosition that adds flags to track status as linestart and paragraph start positions.
     * <p>
     * This is implemented as a wrapper since the TextPosition class doesn't provide complete access to its state fields
     * to subclasses. Also, conceptually TextPosition is immutable while these flags need to be set post-creation so it
     * makes sense to put these flags in this separate class.
     * </p>
     * 
     * @author [email protected]
     */
    private static final class PositionWrapper
    {
        private boolean isLineStart = false;
        private boolean isParagraphStart = false;
        private boolean isPageBreak = false;
        private boolean isHangingIndent = false;
        private boolean isArticleStart = false;

        private TextPosition position = null;

        /**
         * Constructs a PositionWrapper around the specified TextPosition object.
         *
         * @param position the text position.
         */
        PositionWrapper(TextPosition position)
        {
            this.position = position;
        }

        /**
         * Returns the underlying TextPosition object.
         * 
         * @return the text position
         */
        public TextPosition getTextPosition()
        {
            return position;
        }

        public boolean isLineStart()
        {
            return isLineStart;
        }

        /**
         * Sets the isLineStart() flag to true.
         */
        public void setLineStart()
        {
            this.isLineStart = true;
        }

        public boolean isParagraphStart()
        {
            return isParagraphStart;
        }

        /**
         * sets the isParagraphStart() flag to true.
         */
        public void setParagraphStart()
        {
            this.isParagraphStart = true;
        }

        public boolean isArticleStart()
        {
            return isArticleStart;
        }

        /**
         * Sets the isArticleStart() flag to true.
         */
        public void setArticleStart()
        {
            this.isArticleStart = true;
        }

        public boolean isPageBreak()
        {
            return isPageBreak;
        }

        /**
         * Sets the isPageBreak() flag to true.
         */
        public void setPageBreak()
        {
            this.isPageBreak = true;
        }

        public boolean isHangingIndent()
        {
            return isHangingIndent;
        }

        /**
         * Sets the isHangingIndent() flag to true.
         */
        public void setHangingIndent()
        {
            this.isHangingIndent = true;
        }
    }
}