java source code of PDF

pdfxtk-master
- checkstyle.xml
- pom.xml
- LICENSE
- lib
  - com
    - touchgraph
      - TGWikiBrowser
        1.0
- .gitattributes
- pdfXtk
  - src
    - main
      - resources
        PDFObjectExtractor.properties
        guiConfig.xml
        log4j.properties
      - java
        at
        ac
        tuwien
        dbai
        pdfwrap
        comparators
        XComparator.java
        YComparator.java
        EdgeAttributeComparator.java
        XYTextComparator.java
        EdgeLengthComparator.java
        pdfread
        PDFObjectExtractor.java
        PDFPage.java
        utils
        SegmentUtils.java
        Utils.java
        SerializationUtil.java
        ListUtils.java
        operator
        FillEvenOddRule.java
        ShowText.java
        SetStrokingColorSpace.java
        FillNonZeroRule.java
        AppendRectangleToPath.java
        CurveToReplicateFinalPoint.java
        SetStrokingRGBColor.java
        EndPath.java
        StrokePath.java
        ModifyClippingPath.java
        CurveToReplicateInitialPoint.java
        MoveTo.java
        SetStrokingCMYKColor.java
        GRestore.java
        CurveTo.java
        Invoke.java
        ShowTextGlyph.java
        BeginInlineImage.java
        SetNonStrokingColorSpace.java
        SetLineWidth.java
        FillNonZeroAndStrokePath.java
        SetNonStrokingRGBColor.java
        ClosePath.java
        GSave.java
        SetNonStrokingCMYKColor.java
        LineTo.java
        gui
        elements
        PageSpinner.java
        PDFPanel.java
        SelectionPanel.java
        CheckBoxList.java
        MainFrame.java
        layer
        Style.java
        Shapes.java
        StyledSegment.java
        exceptions
        UnknownShapeException.java
        tools
        XMLLayerLoader.java
        PDF_XMLSerializer.java
        OpenDocFileFilter.java
        MultiLineTooltip.java
        EdgeSegment.java
        analysis
        PageProcessor.java
        CandidateCluster.java
        LineProcessor.java
        SegmentationResult.java
        RulingObjectProcessor.java
        TextBlockPageSegmenter.java
        AbstractPageSegmenter.java
        exceptions
        DocumentProcessingException.java
        model
        graph
        AdjacencyEdge.java
        AdjacencyGraph.java
        document
        IXmillumSegment.java
        CompositeSegment.java
        LineSegment.java
        OpTuple.java
        CharSegment.java
        ImageSegment.java
        IBlankSegment.java
        AttributeTuple.java
        LineFragment.java
        TextLine.java
        RectSegment.java
        TextBlock.java
        TextSegment.java
        GenericSegment.java
        IXHTMLSegment.java
        TextFragment.java
        Page.java
        ProcessFile.java
        com
        javazoid
        functions
        FileFunctions.java
    - test
      - resources
        PDFObjectExtractor.properties
        log4j.properties
      - java
        at
        ac
        tuwien
        dbai
        pdfwrap
        comparators
        EdgeAttributeComparatorTest.java
        utils
        SegmentUtilsTest.java
  - gs.bat
  - pom.xml
  - graphwrap-example.sh
  - LICENSE
  - graphwrap.sh
  - .gitignore
  - process-file.sh
- pmd.xml
- graphwrap
  - xml
    - text.xsl
  - src
    - main
      - java
        at
        ac
        tuwien
        dbai
        pdfwrap
        GraphMatcher.java
        gui
        graphbrowser
        DocWrapperUI.java
        DocNavigateUI.java
        DocGBPanel.java
        displayable
        Block2.java
        Image2.java
        TextArea.java
        TextArea2.java
        GUI.java
        model
        graph
        WrappingInstance.java
        DocEdge.java
        DocGraphEltSet.java
        DocumentGraph.java
        package-info.java
        DocNode.java
        iiuf
        xmillum
        Displayable.java
        ElementTagger.java
        DisplayableAppearance.java
        StyleRegistry.java
        ActionHandlerParam.java
        ImageFactory.java
        xmillum.colors
        tool
        element.gif
        LabelWizard.java
        attribute.gif
        FlagOutput.java
        text.gif
        Hottable.java
        rsrc.swcp
        TextUpdate.java
        InfoWindow.java
        XMLTree.java
        Image.java
        ImageListener.java
        ToolFactory.java
        Parameter.java
        ParameterException.java
        XMIllumFrame.java
        Tool.java
        displayable
        Polygon.java
        Image.java
        Block.java
        Root.java
        TextArea.java
        BrowserContext.java
        IllumDocument.java
        JavaImageFactory.java
        BrowserPanel.java
        FlagAccess.java
        JAIImageFactory.java
        handlers
        Invalidate.java
        Output.java
        Select.java
        PopupFlagger.java
        LabelAttribute.java
        Split.java
        TextUpdate.java
        Info.java
        DocumentChangeEvent.java
        WindowCreator.java
        IllumException.java
        Style.java
        XMIllumDesktop.java
        ActionHandlerFactory.java
        StatusListener.java
        FlagListener.java
        Window.java
        AllFlagListener.java
        DisplayableFactory.java
        FlagManager.java
        DocumentChangeListener.java
        ActionHandler.java
        DisplayableClass.java
        IllumSource.java
        dom
        DOMContext.java
        Xerces.java
        Comparison.java
        DOMElementFactory.java
        DOMUtils.java
        DOMUtilsNS.java
        test
        DOM.java
        DOMable.java
        ElementList.java
        DefaultElement.java
        DOMManager.java
        DOMHandler.java
        ProjectX.java
        DOM.java
        swing
        TableMap.java
        LocatedIcon.java
        graph
        GeometryEditor.java
        AbstractPropertiesFactory.java
        GraphEdge.java
        GraphPanelEditor.java
        TreeNL.java
        AbstractNodeComponentFactory.java
        GraphRouter.java
        ForceDirectedNL.java
        GraphEdgeUtils.java
        EdgeMarker.java
        EdgeEditor.java
        GraphPanel.java
        EquilateralTriangleMarker.java
        NodeLayouter.java
        OrthogonalRouter.java
        StyleEditor.java
        DistanceOrderedNL.java
        GraphNodePort.java
        DefaultNL.java
        LabelMarker.java
        AbstractGraphEdgeFactory.java
        GraphNodeComponent.java
        ConnectingNode.java
        AbstractPortFactory.java
        StraightLineRouter.java
        DOM.java
        Resource.java
        TreeView.java
        AbstractPreview.java
        TableSorter.java
        Swing.java
        JWindowToolBarUI.java
        JInternalFrame.java
        AudioPreview.java
        MultiLineToolTip.java
        Preferences.java
        ContextMenuEnabled.java
        ChartPanel.java
        MultiColumnListUI.java
        ListTreeModel.java
        JTreeView.java
        SplitPaneTreeView.java
        ImagePreview.java
        JNumberField.java
        AutoExpandingJTree.java
        ContextMenu.java
        SimpleTreeModel.java
        propertiespanel
        Property.java
        TextField.java
        ComboBox.java
        StringCheckbox.java
        NumberField.java
        List.java
        ListAccess.java
        PropertiesPanel.java
        Group.java
        SubTreeModel.java
        PreviewFileChooser.java
        CheckBoxList.java
        VerticalLabelUI.java
        HexagonalBorder.java
        MemoryMonitor.java
        rsrc.swcp
        rsrc
        align_dright.gif
        forward.gif
        backward.gif
        loc_ne.gif
        align_right.gif
        align_bottom.gif
        align_dbottom.gif
        loc_se.gif
        red-ball-small.gif
        align_dvcenter.gif
        not_pencil.gif
        align_south.gif
        align_centerh.gif
        align_north_sel.gif
        loc_w.gif
        arc.gif
        align_east.gif
        open.gif
        align_hcenter.gif
        pencil.gif
        two_color.gif
        align_dleft.gif
        loc_nw.gif
        rot270.gif
        loc_n.gif
        to_back.gif
        one_color.gif
        loc_s.gif
        align_vcenter.gif
        align_dsv.gif
        align_east_sel.gif
        loc_c.gif
        align_dhcenter.gif
        align_south_sel.gif
        yellow-ball-small.gif
        thread.gif
        align_centerv.gif
        align_west.gif
        align_west_sel.gif
        align_centerh_sel.gif
        align_left.gif
        align_dsh.gif
        rot180.gif
        loc_sw.gif
        align_dtop.gif
        align_centerv_sel.gif
        to_front.gif
        stop.gif
        loc_e.gif
        rot90.gif
        align_top.gif
        info.gif
        green-ball-small.gif
        play.gif
        align_north.gif
        rightarrow.gif
        ProgressMonitor.java
        SetSelectionModel.java
        ChooserTreeView.java
        ContextMenuManager.java
        ButtonTreePathView.java
        DOM.java
        TreeViewTest.java
        log
        Server.java
        Overview.java
        Client.java
        LogListener.java
        prio2.gif
        doc.gif
        LogMessageListener.java
        Stdout.java
        prio7.gif
        Log.java
        prio4.gif
        prio0.gif
        clear.gif
        Const.java
        prio5.gif
        prio6.gif
        LogTableModel.java
        rsrc.swcp
        prio3.gif
        List.java
        prio1.gif
        LogMessage.java
        util
        UTMacOS.java
        DirTree.java
        TreeNode.java
        ClusteringQueue.java
        UTTeX.java
        PrefReanimator.java
        Unicode.java
        UnicodeTranslator.java
        AttributeFactory.java
        graph
        DefaultGraphEdge.java
        GraphException.java
        GraphEdge.java
        GraphWalk.java
        DefaultGraphModel.java
        GraphPort.java
        GraphPortListener.java
        Utilities.java
        GraphModelListener.java
        DefaultGraphNode.java
        GraphNode.java
        GraphModel.java
        DefaultGraphPort.java
        AbstractGraphModel.java
        Path.java
        DOM.java
        UTHTML.java
        UnicodeTrans.java
        EventListenerList.java
        Attributable.java
        PrefWatcher.java
        PrefNamer.java
        CacheArray.java
        Preferences.java
        FilePreferences.java
        BinaryTree.java
        StopWatch.java
        BinaryTreeNode.java
        TreeWalk.java
        NotImplementedException.java
        Trans.java
        Tree.java
        Queue.java
        ProgressWatcher.java
        ListParser.java
        AsyncAccelerator.java
        RectMapFilter.java
        RectMap.java
        Base64Encoder.java
        DefaultAttributable.java
        RedBlackTree.java
        CacheArrayBackEnd.java
        ProgressListener.java
        PrefPropertyWatcher.java
        AsyncInvocation.java
        Timer.java
        NestedException.java
        DBPreferences.java
        Util.java
        PreferencesStore.java
        Crypt.java
        AppletPreferences.java
        SysPreferences.java
        Base64FormatException.java
        Base64Decoder.java
        PreferencesHandler.java
        Strings.java
        DOM.java
        CopyThread.java
        RedBlackNode.java
        awt
        ProgressBar.java
        WaitBar.java
        AppletFrame.java
        StringRequester.java
        DateChooser.java
        VNav.java
        MultiLineLabel.java
        SplashScreen.java
        BorderLayout.java
        TableView.java
        InfoList.java
        Awt.java
        LoginRequester.java
        Frame.java
        Applet.java
        Requester.java
        HLine.java
        rsrc.swcp
        rsrc
        stop_crsr.gif
        HNav.java
        FindListener.java
        Dialog.java
        Find.java
        DOM.java
        jai
        DisplayRect.java
        HilightImagePanel.java
        BlackOrDescriptor.java
        BinarizeOpImage.java
        CCDescriptor.java
        FolderImageStorage.java
        CCOpImage.java
        DisplayImagePanel.java
        ImageStorage.java
        BlackOrOpImage.java
        HistogramPanel.java
        SkeletonDescriptor.java
        ProjectionProfileOpImage.java
        test
        ImageStorageTest.java
        Scale.java
        ConnectedComponents.java
        ProjectionProfiles.java
        RandomizeOpImage.java
        RandomizeDescriptor.java
        ImageViewer.java
        ProjectionProfileDescriptor.java
        DirectRasterAccessor.java
        RLSADescriptor.java
        JAITest.java
        RectDisplayImageLayer.java
        DisplayImageLayer.java
        LUV_ColorSpace.java
        Util.java
        PowerOpImage.java
        ProjectionProfile.java
        BinarizeDescriptor.java
        DisplayImage.java
        RLSAOpImage.java
        ImageStorageException.java
        PowerDescriptor.java
        SkeletonOpImage.java
        com
        touchgraph
        graphlayout
        TGPanel.java
        Edge.java
        Node.java
        interaction
        LocalityScroll.java
        DragMultiselectUI.java
        TGAbstractClickUI.java
        TGAbstractMousePausedUI.java
        DragAddUI.java
        HyperScroll.java
        GLNavigateUI.java
        DragNodeUI.java
        TGAbstractMouseMotionUI.java
        HVScroll.java
        TGUIManager.java
        ZoomScroll.java
        TGUserInterface.java
        TGAbstractDragUI.java
        GLEditUI.java
        TGSelfDeactivatingUI.java
        RotateScroll.java
        HVRotateDragUI.java
  - pom.xml
  - LICENSE
  - lib
  - gui.sh
  - .gitignore
- README.md
- .gitignore

package at.ac.tuwien.dbai.pdfwrap.gui.tools;

import at.ac.tuwien.dbai.pdfwrap.model.document.*;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * Class for (de-)serializing the PDF analysis output.
 * 
 * @author Timo Schleicher
 *
 */
public class PDF_XMLSerializer {

	/**
	 * Serializes the result of a PDF analysis to XML.
	 * 
	 * @param savePath The path where you want to save the XML
	 * @param pdfOutput The output of the PDF analysis process
	 * @param curFile The current file either the PDF itself or a XML file
	 * @throws ParserConfigurationException 
	 * @throws TransformerException 
	 * @throws IOException 
	 * @throws SAXException 
	 */
	public static void serialize(String savePath, List<Page> pdfOutput, File curFile) throws ParserConfigurationException, TransformerException, SAXException, IOException {
		
		//Take care of the file extension - maybe we need to add a XML extension
		savePath = (savePath.toLowerCase().endsWith(".xml")) ? savePath : savePath.concat(".xml");
		
		//Determine the path of the PDF file. I
		//f it is already a PDF just take its location otherwise get the path from the XML file.
		String pdfPath = (curFile.getPath().toLowerCase().endsWith(".pdf")) ? curFile.getPath() :
						  PDF_XMLSerializer.getPDFPath(curFile);		
		
		//Check whether the file was not deleted or moved during the process
		if (!new File(pdfPath).exists()) {
			
			throw new FileNotFoundException("Can not find the following file:\n" + pdfPath);
		}
		
		//Initialization stuff
		DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
		DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
 
		//Get the root element
		Document doc = docBuilder.newDocument();
		Element rootElement = doc.createElement("PDFDocument");
		
		rootElement.setAttribute("path", pdfPath);
		doc.appendChild(rootElement);
				
		for (Page page : pdfOutput) {
			
			Element pageE = doc.createElement("page");
			
			//Serialize all page attributes
			for (AttributeTuple attr : page.getAttributes()) {
				
				pageE.setAttribute(attr.getAttributeName(), attr.getAttributeValue());
			}
			
			rootElement.appendChild(pageE);
			
			//Serialize all segment attributes
			for (GenericSegment seg : page.getItems()) {
				
				Element segment = doc.createElement("segment");
				
				for (AttributeTuple attr : seg.getAttributes()) {
					
					segment.setAttribute(attr.getAttributeName(), attr.getAttributeValue());
				}
				
				pageE.appendChild(segment);
			}
		}
		
		//Write the content into XML file
		TransformerFactory transformerFactory = TransformerFactory.newInstance();
		Transformer transformer = transformerFactory.newTransformer();
		DOMSource source = new DOMSource(doc);
		StreamResult result = new StreamResult(new File(savePath));
		
		//Styling of the XML file with indenting lines
		transformer.setOutputProperty(OutputKeys.INDENT, "yes");
		transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
		
		transformer.transform(source, result);
	}
	
	/**
	 * Load a previous analysis result from a XML file.
	 * 
	 * @param xml The XML file you want to load
	 * @return A list of Page objects
	 * @throws ParserConfigurationException
	 * @throws SAXException
	 * @throws IOException
	 */
	public static List<Page> deserializeAnalysis(File xml) throws ParserConfigurationException, SAXException, IOException {

		List<Page> pages = new ArrayList<Page>();
		
		//Initialization stuff
		DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
		DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
		
		Document doc = dBuilder.parse(xml);
		doc.getDocumentElement().normalize();
		
		//Get all the page elements
		NodeList xmlPages = doc.getElementsByTagName("page");
		
		for (int i = 0; i < xmlPages.getLength(); i++) {
			
			Element xmlPage = (Element) xmlPages.item(i);
				
			//Get the page attributes
			float x1 = Float.valueOf(xmlPage.getAttribute("x1"));
			float x2 = Float.valueOf(xmlPage.getAttribute("x2"));
			float y1 = Float.valueOf(xmlPage.getAttribute("y1"));
			float y2 = Float.valueOf(xmlPage.getAttribute("y2"));
			
			int pageNumber = Integer.valueOf(xmlPage.getAttribute("pageNo"));
			
			//Get all the segments of a page element
			NodeList xmlSegments = xmlPage.getElementsByTagName("segment");
			
			ArrayList<GenericSegment> segList = new ArrayList<GenericSegment>();
			
			for (int k = 0; k < xmlSegments.getLength(); k++) {
				
				Element xmlSegment = (Element) xmlSegments.item(k);		
			
				try {
				
					GenericSegment seg = attrToSegment(xmlSegment);
					segList.add(seg);
					
				} catch (IOException e) {
						
					e.printStackTrace();
				}

			}			
			
			Page page = new Page(x1, x2, y1, y2, segList);
			page.setPageNo(pageNumber);
			
			pages.add(page);
		}
		
		return pages;
	}
	
	/**
	 * Method for loading a segment from a XML node. Make sure to correctly
	 * outline each possible segment within this method in order to get a proper loading of the segments.
	 * 
	 * @param node The node of the XML structure
	 * @return A GenericSegment loaded from the XML node
	 * @throws IOException
	 */
	private static GenericSegment attrToSegment(Element node) throws IOException {
		
		//Get the type of the segment as well as the coordinates
		String type = node.getAttribute("type");
		
		float x1 = Float.valueOf(node.getAttribute("x1"));
		float x2 = Float.valueOf(node.getAttribute("x2"));
		float y1 = Float.valueOf(node.getAttribute("y1"));
		float y2 = Float.valueOf(node.getAttribute("y2"));
		
		GenericSegment seg = null;
		
		//Find the corresponding segment -> add new segment types here in order to properly load them after saving
		switch (type) {
		
		case "line-segment":
			
			seg = new LineSegment(x1, x2, y1, y2);
			break;

		case "rect-segment":
					
			seg = new RectSegment(x1, x2, y1, y2);
			break;
			
		case "filled-rect":
			
			seg = new RectSegment(x1, x2, y1, y2);
			((RectSegment)seg).setFilled(true);
			break;
	
		case "text-block":
			
			String text = node.getAttribute("text");
			String fontName = node.getAttribute("font");
			float fontSize = Float.valueOf(node.getAttribute("fontsize"));
			
			seg = new TextBlock(x1, x2, y1, y2, text, fontName, fontSize);
			break;
			
		case "image-segment":
			
			seg = new ImageSegment(x1, x2, y1, y2);
			break;
			
		default:
			throw new IOException("Unknown segment. Please specify: " + node.getAttribute("type"));
		}
		
		return seg;
	}
	
	/**
	 * Get the path of the PDF file if working with a XML file.
	 * 
	 * @param xml The XML file
	 * @return The path of the PDF document that corresponds to the content of the XML file
	 * @throws ParserConfigurationException
	 * @throws SAXException
	 * @throws IOException
	 */
	public static String getPDFPath(File xml) throws ParserConfigurationException, SAXException, IOException {
		
		//Initialization stuff
		DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
		DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
		
		Document doc = dBuilder.parse(xml);
		doc.getDocumentElement().normalize();
		
		//Get the root element
		NodeList nList = doc.getElementsByTagName("PDFDocument");
		
		Node nNode = nList.item(0);
			
		Element eElement = (Element) nNode;
		
		return eElement.getAttribute("path");
	}
}