/////////////////////////////////////////////////////////////////////////////// // Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ////////////////////////////////////////////////////////////////////////////// //Program which takes in the /tmp/temp.xml file generated and forms a morph.xml file package opennlp.ccgbank.extract; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.util.ArrayList; import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.sax.SAXResult; import javax.xml.transform.sax.SAXSource; import javax.xml.transform.sax.SAXTransformerFactory; import opennlp.ccgbank.extract.ExtractGrammar.ExtractionProperties; import org.apache.xml.serializer.OutputPropertiesFactory; import org.apache.xml.serializer.Serializer; import org.apache.xml.serializer.SerializerFactory; import org.jdom.JDOMException; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLFilter; import org.xml.sax.XMLReader; import org.xml.sax.helpers.XMLReaderFactory; public class MorphExtract { public static void extractMorph(ExtractionProperties extractProps) throws TransformerException, TransformerConfigurationException, SAXException, IOException, JDOMException { System.out.println("Extracting morph:"); System.out.println("Generating morph.xml"); TransformerFactory tFactory = TransformerFactory.newInstance(); File morphFile = new File(new File(extractProps.destDir), "morph.xml"); File tempFile = new File(new File(extractProps.tempDir), "temp.xml"); if (tFactory.getFeature(SAXSource.FEATURE) && tFactory.getFeature(SAXResult.FEATURE)) { SAXTransformerFactory saxTFactory = ((SAXTransformerFactory) tFactory); ArrayList<XMLFilter> filterChain = new ArrayList<XMLFilter>(); ArrayList<String> xslChain = new ArrayList<String>(); if (extractProps.macroSpecs.length() > 0) { } addTransforms(xslChain, extractProps.macroSpecs); for (String xslFile : xslChain) filterChain.add(saxTFactory.newXMLFilter(ExtractGrammar .getSource(xslFile))); // Create an XMLReader and set first xsl transform to that. XMLReader reader = XMLReaderFactory.createXMLReader(); XMLFilter xmlFilter0 = filterChain.get(0); xmlFilter0.setParent(reader); //Create chain of xsl transforms // Create an XMLFilter for each stylesheet. for (int i = 1; i < filterChain.size(); i++) { XMLFilter xmlFilterPrev = filterChain.get(i - 1); XMLFilter xmlFilterCurr = filterChain.get(i); xmlFilterCurr.setParent(xmlFilterPrev); } XMLFilter xmlFilter = filterChain.get(filterChain.size() - 1); java.util.Properties xmlProps = OutputPropertiesFactory .getDefaultMethodProperties("xml"); xmlProps.setProperty("indent", "yes"); xmlProps.setProperty("standalone", "no"); xmlProps.setProperty("{http://xml.apache.org/xalan}indent-amount", "2"); Serializer serializer = SerializerFactory.getSerializer(xmlProps); serializer.setOutputStream(new FileOutputStream(morphFile)); //XMLFilter xmlFilter = xmlFilter2; //XMLFilter xmlFilter = xmlFilter3; xmlFilter.setContentHandler(serializer.asContentHandler()); xmlFilter.parse(new InputSource(tempFile.getPath())); } //Deleting the temporary lex file //tempFile.delete(); } public static void addTransforms(ArrayList<String> xslChain, String macroSpecs) { xslChain.add("opennlp.ccgbank/transform/morphExtr.xsl"); if (macroSpecs.length() == 0) xslChain.add("opennlp.ccgbank/transform/macroInsert.xsl"); if (macroSpecs.contains("agr")) { System.out .println("Inserting a macro to check agreement in the copula"); xslChain.add("opennlp.ccgbank/transform/agr-macroInsert.xsl"); } if (macroSpecs.contains("anim")) { System.out .println("Inserting a macro to check animacy constraints"); xslChain.add("opennlp.ccgbank/transform/anim-macroInsert.xsl"); } } }