Java Code Examples for edu.stanford.nlp.util.StringUtils#argsToProperties()

The following examples show how to use edu.stanford.nlp.util.StringUtils#argsToProperties() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: JavaReExTest.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
public static void main(String[] args){
    try{
      Properties props = StringUtils.argsToProperties(args);
//      props.setProperty("annotators", "tokenize,ssplit,lemma,pos,parse,ner");
      StanfordCoreNLP pipeline = new StanfordCoreNLP();
      String sentence = "John Gerspach was named Chief Financial Officer of Citi in July 2009.";
      Annotation doc = new Annotation(sentence);
      pipeline.annotate(doc);
      RelationExtractorAnnotator r = new RelationExtractorAnnotator(props);
      r.annotate(doc);

      for(CoreMap s: doc.get(CoreAnnotations.SentencesAnnotation.class)){
        System.out.println("For sentence " + s.get(CoreAnnotations.TextAnnotation.class));
        List<RelationMention> rls  = s.get(RelationMentionsAnnotation.class);
        for(RelationMention rl: rls){
          System.out.println(rl.toString());
        }
      }
    }catch(Exception e){
      e.printStackTrace();
    }
  }
 
Example 2
Source File: SpanishPostprocessor.java    From phrasal with GNU General Public License v3.0 6 votes vote down vote up
/**
 * A main method for training and evaluating the postprocessor.
 * 
 * @param args
 */
public static void main(String[] args) {
  // Strips off hyphens
  Properties options = StringUtils.argsToProperties(args, optionArgDefs());
  if (options.containsKey("help") || args.length == 0) {
    System.err.println(usage(SpanishPostprocessor.class.getName()));
    System.exit(-1);
  }

  int nThreads = PropertiesUtils.getInt(options, "nthreads", 1);
  SpanishPreprocessor preProcessor = new SpanishPreprocessor();
  SpanishPostprocessor postProcessor = new SpanishPostprocessor(options);
  
  CRFPostprocessor.setup(postProcessor, preProcessor, options);
  CRFPostprocessor.execute(nThreads, preProcessor, postProcessor);    
}
 
Example 3
Source File: EnglishPostprocessor.java    From phrasal with GNU General Public License v3.0 6 votes vote down vote up
/**
 * A main method for training and evaluating the postprocessor.
 * 
 * @param args
 */
public static void main(String[] args) {
  // Strips off hyphens
  Properties options = StringUtils.argsToProperties(args, optionArgDefs());
  if (options.containsKey("help") || args.length == 0) {
    System.err.println(usage(EnglishPostprocessor.class.getName()));
    System.exit(-1);
  }

  int nThreads = PropertiesUtils.getInt(options, "nthreads", 1);
  EnglishPreprocessor preProcessor = new EnglishPreprocessor();
  EnglishPostprocessor postProcessor = new EnglishPostprocessor(options);
  
  CRFPostprocessor.setup(postProcessor, preProcessor, options);
  CRFPostprocessor.execute(nThreads, preProcessor, postProcessor);    
}
 
Example 4
Source File: GermanPostprocessor.java    From phrasal with GNU General Public License v3.0 6 votes vote down vote up
/**
 * A main method for training and evaluating the postprocessor.
 * 
 * @param args
 */
public static void main(String[] args) {
  // Strips off hyphens
  Properties options = StringUtils.argsToProperties(args, optionArgDefs());
  if (options.containsKey("help") || args.length == 0) {
    System.err.println(usage(GermanPostprocessor.class.getName()));
    System.exit(-1);
  }

  int nThreads = PropertiesUtils.getInt(options, "nthreads", 1);
  GermanPreprocessor preProcessor = new GermanPreprocessor();
  GermanPostprocessor postProcessor = new GermanPostprocessor(options);
  
  CRFPostprocessor.setup(postProcessor, preProcessor, options);
  CRFPostprocessor.execute(nThreads, preProcessor, postProcessor);    
}
 
Example 5
Source File: Phrasal.java    From phrasal with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Run Phrasal from the command line.
 *
 * @param args
 * @throws Exception
 */
public static void main(String[] args) throws Exception {
  final Properties options = StringUtils.argsToProperties(args);
  final String configFile = options.containsKey("") ? (String) options.get("") : null;
  options.remove("");
  if ((options.size() == 0 && configFile == null) || options.containsKey("help") || options.containsKey("h")) {
    System.err.println(usage());
    System.exit(-1);
  }

  // by default, exit on uncaught exception
  Thread.setDefaultUncaughtExceptionHandler((t, ex) -> {
    logger.fatal("Uncaught top-level exception", ex);
    System.exit(-1);
  });

  final Map<String, List<String>> configuration = getConfigurationFrom(configFile, options);
  final Phrasal p = Phrasal.loadDecoder(configuration);
  
  if (options.containsKey("text")) p.decode(new FileInputStream(new File(options.getProperty("text"))), true);
  else p.decode(System.in, true);
}
 
Example 6
Source File: InteractiveDriver.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
public static void main(String[] args) throws IOException {
    Properties props = StringUtils.argsToProperties(args);
    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
    props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");
    Set<String> interested = Stream.of("per:title", "per:employee_of", "org:top_members/employees").collect(Collectors.toSet());
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    IOUtils.console("sentence> ", line -> {
        Annotation ann = new Annotation(line);
        pipeline.annotate(ann);
        for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
            sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(r -> {
                 String relation = r.relationGloss();
                if(interested.contains(relation)) {
                    System.err.println(r);
                }
            });
        }
    });
}
 
Example 7
Source File: KBPTest.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
/**
 * A debugging method to try relation extraction from the console.
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
  Properties props = StringUtils.argsToProperties(args);
  props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
  props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");

  StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
  IOUtils.console("sentence> ", line -> {
    Annotation ann = new Annotation(line);
    pipeline.annotate(ann);
    for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
      sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(System.err::println);
      System.out.println(sentence);
    }
  });
}
 
Example 8
Source File: MakeWordClasses.java    From phrasal with GNU General Public License v3.0 5 votes vote down vote up
/**
 * @param args
 */
public static void main(String[] args) {
  Properties options = StringUtils.argsToProperties(args, optionArgDefs());
  String[] filenames = options.getProperty("","").split("\\s+");
  if (filenames.length < 1 || filenames[0].length() == 0 || options.containsKey("h")
      || options.containsKey("help")) {
    System.err.println(usage());
    System.exit(-1);
  }
  MakeWordClasses mkWordCls = new MakeWordClasses(options);
  mkWordCls.run(filenames);
  mkWordCls.writeResults(System.out);
}
 
Example 9
Source File: CRFPreprocessor.java    From phrasal with GNU General Public License v3.0 5 votes vote down vote up
public static CRFClassifier<CoreLabel> loadClassifier(String options) throws IllegalArgumentException {
  String[] inputFlags = options.split(" ");
  Properties props = StringUtils.argsToProperties(inputFlags);
  SeqClassifierFlags flags = new SeqClassifierFlags(props);
  CRFClassifier<CoreLabel> crfSegmenter = new CRFClassifier<>(flags);
  if(flags.loadClassifier == null) {
    throw new IllegalArgumentException("missing -loadClassifier flag for CRF preprocessor.");
  }
  crfSegmenter.loadClassifierNoExceptions(flags.loadClassifier, props);
  crfSegmenter.loadTagIndex();
  return crfSegmenter;
}
 
Example 10
Source File: PhraseViewer.java    From phrasal with GNU General Public License v3.0 5 votes vote down vote up
private static boolean validateCommandLine(String[] args) {
  // Command line parsing
  Properties options = StringUtils.argsToProperties(args, argDefs());

  VERBOSE = options.containsKey("v");
  SRC_FILE = options.getProperty("s", null);
  OPTS_FILE = options.getProperty("o", null);
  XSD_FILE = options.getProperty("x", null);
  FIRST_ID = PropertiesUtils.getInt(options, "f", Integer.MIN_VALUE);
  LAST_ID = PropertiesUtils.getInt(options,"l",Integer.MAX_VALUE);

  return true;
}
 
Example 11
Source File: SerializedDependencyToCoNLL.java    From phrasal with GNU General Public License v3.0 5 votes vote down vote up
public static void main(String[] args) {

    Properties options = StringUtils.argsToProperties(args, optionArgDefs());
    String annotations = PropertiesUtils.get(options, "annotations", null, String.class);
    
    boolean changepreps = PropertiesUtils.getBool(options, "changepreps", false);
    
    int sentenceCount = CoreNLPCache.loadSerialized(annotations);
   
    
    CoreMap sentence;
    for (int i = 0; i < sentenceCount; i++) {
      try {  
        sentence = CoreNLPCache.get(i);
        if (sentence == null) {
          System.out.println();
          System.err.println("Empty sentence #" + i);
          continue;
        }
        printDependencies(sentence, changepreps);
        //System.err.println("---------------------------");
      } catch (Exception e) {
        System.err.println("SourceSentence #" + i);
        e.printStackTrace();
        return;
      }
    }
  }
 
Example 12
Source File: SentenceLevelEvaluation.java    From phrasal with GNU General Public License v3.0 5 votes vote down vote up
/**
 * 
 * @param args
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
  if (args.length < 1) {
    System.err.print(usage());
    System.exit(-1);
  }
  
  Properties options = StringUtils.argsToProperties(args, argDefs());
  int ngramOrder = PropertiesUtils.getInt(options, "order", BLEUMetric.DEFAULT_MAX_NGRAM_ORDER);
  boolean disableTokenization = PropertiesUtils.getBool(options, "no-nist", false);
  String metric = options.getProperty("metric", "bleu");

  String[] refs = options.getProperty("").split("\\s+");
  List<List<Sequence<IString>>> referencesList = MetricUtils.readReferences(refs, ! disableTokenization);
  System.err.printf("Metric: %s with %d references%n", metric, referencesList.get(0).size());
  
  LineNumberReader reader = new LineNumberReader(new InputStreamReader(
      System.in));
  int sourceInputId = 0;
  for (String line; (line = reader.readLine()) != null; ++sourceInputId) {
    line = disableTokenization ? line : NISTTokenizer.tokenize(line);
    Sequence<IString> translation = IStrings.tokenize(line);
    double score = getScore(translation, referencesList.get(sourceInputId), ngramOrder, metric);
    System.out.printf("%.4f%n", score);
  }
  System.err.printf("Scored %d input segments%n", sourceInputId);
}
 
Example 13
Source File: BLEUMetric.java    From phrasal with GNU General Public License v3.0 4 votes vote down vote up
public static void main(String[] args) throws IOException {
  if (args.length < 1) {
    System.err.print(usage());
    System.exit(-1);
  }

  Properties options = StringUtils.argsToProperties(args, argDefs());
  int BLEUOrder = PropertiesUtils.getInt(options, "order", BLEUMetric.DEFAULT_MAX_NGRAM_ORDER);
  boolean doSmooth = PropertiesUtils.getBool(options, "smooth", false);
  boolean disableTokenization = PropertiesUtils.getBool(options, "no-nist", false);
  boolean doCased = PropertiesUtils.getBool(options, "cased", false);

  // Setup the metric tokenization scheme. Applies to both the references and
  // hypotheses
  if (doCased) NISTTokenizer.lowercase(false);
  NISTTokenizer.normalize( ! disableTokenization);

  // Load the references
  String[] refs = options.getProperty("").split("\\s+");
  System.out.printf("Metric: BLEU-%d with %d references%n", BLEUOrder, refs.length);
  List<List<Sequence<IString>>> referencesList = MetricUtils.readReferences(refs, true);

  // For backwards compatibility
  doSmooth |= System.getProperty("smoothBLEU") != null;

  BLEUMetric<IString, String> bleu = new BLEUMetric<IString, String>(referencesList, BLEUOrder,
        doSmooth);
  BLEUMetric<IString, String>.BLEUIncrementalMetric incMetric = bleu
      .getIncrementalMetric();

  LineNumberReader reader = new LineNumberReader(new InputStreamReader(
      System.in));
  for (String line; (line = reader.readLine()) != null; ) {
    line = NISTTokenizer.tokenize(line);
    Sequence<IString> translation = IStrings.tokenize(line);
    ScoredFeaturizedTranslation<IString, String> tran = new ScoredFeaturizedTranslation<IString, String>(
        translation, null, 0);
    incMetric.add(tran);
  }
  // Check for an incomplete set of translations
  if (reader.getLineNumber() < referencesList.size()) {
    System.err.printf("WARNING: Translation candidate file is shorter than references (%d/%d)%n", 
        reader.getLineNumber(), referencesList.size());
  }
  reader.close();

  double[] ngramPrecisions = incMetric.ngramPrecisions();
  System.out.printf("BLEU = %.3f, ", 100 * incMetric.score());
  for (int i = 0; i < ngramPrecisions.length; i++) {
    if (i != 0) {
      System.out.print("/");
    }
    System.out.printf("%.3f", ngramPrecisions[i] * 100);
  }
  System.out.printf(" (BP=%.3f, ratio=%.3f %d/%d)%n", incMetric
      .brevityPenalty(), ((1.0 * incMetric.candidateLength()) / incMetric
      .effectiveReferenceLength()), incMetric.candidateLength(), incMetric
      .effectiveReferenceLength());

  System.out.printf("%nPrecision Details:%n");
  double[][] precCounts = incMetric.ngramPrecisionCounts();
  for (int i = 0; i < ngramPrecisions.length; i++) {
    System.out.printf("\t%d:%d/%d%n", i, (int) precCounts[i][0], (int) precCounts[i][1]);
  }
}
 
Example 14
Source File: DynamicTMBuilder.java    From phrasal with GNU General Public License v3.0 4 votes vote down vote up
/**
 * 
 * @param args
 */
public static void main(String[] args) {
  if (args.length < 1 || args[0].equals("-h") || args[0].equals("-help")) {
    System.err.print(usage());
    System.exit(-1);
  }
  Properties options = StringUtils.argsToProperties(args, optionDefs());
  String[] positionalArgs = options.getProperty("").split("\\s+");
  if (positionalArgs.length < 3) {
    System.err.print(usage());
    System.exit(-1);      
  }
  
  String outputFileName = options.getProperty("o", "tm" + IOTools.BIN_EXTENSION);
  SymmetrizationType type = options.containsKey("s") ? SymmetrizationType.valueOf(options.getProperty("s"))
      : SymmetrizationType.valueOf("grow_diag_final_and");
  
  String sourceFile = positionalArgs[0];
  String targetFile = positionalArgs[1];
  String alignFEfile = positionalArgs[2];
  String alignEFfile = positionalArgs.length == 4 ? positionalArgs[3] : null;
  
  logger.info("Source file: {}", sourceFile);
  logger.info("Target file: {}", targetFile);
  logger.info("Alignment file (f2e): {}", alignFEfile);
  if (alignEFfile != null) logger.info("Alignment file (e2f): {}", alignEFfile);
  
  try {
    TimeKeeper timer = TimingUtils.start();
    // Load corpus
    DynamicTMBuilder tmBuilder = alignEFfile == null ? new DynamicTMBuilder(sourceFile, targetFile, alignFEfile) :
      new DynamicTMBuilder(sourceFile, targetFile, alignFEfile, alignEFfile, type);
    timer.mark("Corpus Loading");
    
    // Build TM
    DynamicTranslationModel<String> tm = tmBuilder.build();
    timer.mark("Model construction");
        
    // Serialize
    logger.info("Serializing to: " + outputFileName);
    IOTools.serialize(outputFileName, tm);
    timer.mark("Serialization");
    
    logger.info("Timing summary: {}", timer);
    logger.info("Success! Shutting down...");
  } catch (Exception e) {
    logger.fatal("Translation model build error!", e);
  }
}
 
Example 15
Source File: MakePTMPhrasalInput.java    From phrasal with GNU General Public License v3.0 4 votes vote down vote up
/**
   * @param args
   * @throws IOException 
   */
  public static void main(String[] args) throws IOException {
    if (args.length != 3) {
      System.err.print(usage());
      System.exit(-1);
    }
    Properties options = StringUtils.argsToProperties(args, argDefs());
    String[] positionalArgs = options.getProperty("").split("\\s+");

    String srcLang = positionalArgs[0];
    String tgtLang = positionalArgs[1];
    String sqlFile = positionalArgs[2];
    
    Preprocessor srcPreproc = ProcessorFactory.getPreprocessor(srcLang);
    Preprocessor tgtPreproc = ProcessorFactory.getPreprocessor(tgtLang);
    
    System.out.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s%n", "doc_id", "seg_id", "username", "mt_tok", "user_tok", "s2mt_tok", "src_tok");
//    CSVReader reader = new CSVReader(new FileReader(sqlFile));
    // Skip header
    boolean seenHeader = false;
//    for (String[] fields; (fields = reader.readNext()) != null;) {
  for (String[] fields = null;;) {
      if ( ! seenHeader) {
        seenHeader = true;
        continue;
      }
//      String segId = String.format("%s:%s", fields[0], fields[1]).replace(".src.json", ".tgt");
      String tgtLine = fields[3].trim();
      String alignStr = extend(fields[5]).trim();
      String srcLine = fields[6].trim();
      SymmetricalWordAlignment s2t = new SymmetricalWordAlignment(srcLine, tgtLine, alignStr);
      SymmetricalWordAlignment s2sPrime = srcPreproc.processAndAlign(srcLine);
      SymmetricalWordAlignment t2tPrime = tgtPreproc.processAndAlign(tgtLine);
      String userTextTok = tgtPreproc.process(fields[3]).toString();
      
      // Want sprime --> tprime
      List<String> alignmentList = new LinkedList<>();
      for (int i = 0, size = s2sPrime.eSize(); i < size; ++i) {
        Set<Integer> alignments = s2sPrime.e2f(i);
        for (int j : alignments) {
          Set<Integer> alignments2 = s2t.f2e(j);
          for (int k : alignments2) {
            Set<Integer> alignments3 = t2tPrime.f2e(k);
            for (int q : alignments3) {
              alignmentList.add(String.format("%d-%d",i,q));
            }
          }
        }
      }
      System.out.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s%n", fields[0], fields[1], fields[2], t2tPrime.e().toString(), userTextTok, Sentence.listToString(alignmentList), s2sPrime.e().toString());
    }
//    reader.close();
  }
 
Example 16
Source File: BLEUGenreEvaluator.java    From phrasal with GNU General Public License v3.0 4 votes vote down vote up
/**
 * 
 * @param args
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
  if (args.length < 2) {
    System.err.print(usage());
    System.exit(-1);
  }

  Properties options = StringUtils.argsToProperties(args, argDefs());
  int BLEUOrder = PropertiesUtils.getInt(options, "order", BLEUMetric.DEFAULT_MAX_NGRAM_ORDER);
  boolean doCased = PropertiesUtils.getBool(options, "cased", false);

  // Setup the metric tokenization scheme. Applies to both the references and
  // hypotheses
  if (doCased) NISTTokenizer.lowercase(false);

  // Load the references
  String[] parameters = options.getProperty("").split("\\s+");
  String[] refs = new String[parameters.length - 1];
  System.arraycopy(parameters, 1, refs, 0, refs.length);
  List<InputProperties> inputProperties = InputProperties.parse(new File(parameters[0]));
  List<List<Sequence<IString>>> referencesList = MetricUtils.readReferences(refs, true);
  
  Map<String,BLEUMetric<IString, String>.BLEUIncrementalMetric> metrics = 
      BLEUGenreEvaluator.run(referencesList, inputProperties, BLEUOrder, System.in);

  for (Map.Entry<String,BLEUMetric<IString, String>.BLEUIncrementalMetric> entry : metrics.entrySet()) {
    String genre = entry.getKey();
    BLEUMetric<IString, String>.BLEUIncrementalMetric incMetric = entry.getValue();
    System.out.printf("Genre: %s%n", genre);
    double[] ngramPrecisions = incMetric.ngramPrecisions();
    System.out.printf("BLEU = %.3f, ", 100 * incMetric.score());
    for (int i = 0; i < ngramPrecisions.length; i++) {
      if (i != 0) {
        System.out.print("/");
      }
      System.out.printf("%.3f", ngramPrecisions[i] * 100);
    }
    System.out.printf(" (BP=%.3f, ratio=%.3f %d/%d)%n", incMetric
        .brevityPenalty(), ((1.0 * incMetric.candidateLength()) / incMetric
            .effectiveReferenceLength()), incMetric.candidateLength(), incMetric
            .effectiveReferenceLength());

    System.out.printf("%nPrecision Details:%n");
    double[][] precCounts = incMetric.ngramPrecisionCounts();
    for (int i = 0; i < ngramPrecisions.length; i++) {
      System.out.printf("\t%d:%d/%d%n", i, (int) precCounts[i][0], (int) precCounts[i][1]);
    }
    System.out.println();
  }
}
 
Example 17
Source File: BuildDependencyLMData.java    From phrasal with GNU General Public License v3.0 4 votes vote down vote up
public static void main(String[] args) throws IOException {
  Properties options = StringUtils.argsToProperties(args, optionArgDefs());
  String sourceTokens = PropertiesUtils.get(options, "input", null, String.class);
  String outdirPath = PropertiesUtils.get(options, "outdir", ".", String.class);
  String leftDepLMFilename = outdirPath + File.separator + "left.deplm";
  String rightDepLMFilename = outdirPath + File.separator + "right.deplm";
  String headDepLMFilename = outdirPath + File.separator + "head.deplm";


  File leftDepLMFile = new File(leftDepLMFilename);
  if (!leftDepLMFile.exists())
    leftDepLMFile.createNewFile();

  
  File rightDepLMFile = new File(rightDepLMFilename);
  if (!rightDepLMFile.exists())
    rightDepLMFile.createNewFile();
  
  File headDepLMFile = new File(headDepLMFilename);
  if (!headDepLMFile.exists())
    headDepLMFile.createNewFile();

  FileWriter leftFW = new FileWriter(leftDepLMFile.getAbsoluteFile());
  FileWriter rightFW = new FileWriter(rightDepLMFile.getAbsoluteFile());
  FileWriter headFW = new FileWriter(headDepLMFile.getAbsoluteFile());

  leftDepLMWriter = new BufferedWriter(leftFW);
  rightDepLMWriter = new BufferedWriter(rightFW);
  headDepLMWriter = new BufferedWriter(headFW);

  File sourceSentences = new File(sourceTokens);
  BufferedReader sourceReader = new BufferedReader(new FileReader(sourceSentences));
  String sourceSentence;
  Map<Integer, NavigableSet<Integer>> dependencies = new HashMap<>();
  List<String> tokens = new LinkedList<>();
  while (true) {
    sourceSentence = sourceReader.readLine();
    if (sourceSentence == null || sourceSentence.equals("")) {
      printLeftAndRightDependencies(dependencies, new ArraySequence<String>(tokens));
      dependencies = new HashMap<>();
      tokens = new LinkedList<>();
      if (sourceSentence == null) {
        break;
      } else {
        continue;
      }
    }
   
    String fields[] = sourceSentence.split("\t");
    int id = Integer.parseInt(fields[0]) - 1;
    int head = fields[7].equals("frag") ? -2 : Integer.parseInt(fields[6]) - 1;
   
    String token = fields[1];
    tokens.add(token);
    if (!isWord(token))
      continue;
    if (!dependencies.containsKey(head)) 
      dependencies.put(head, new TreeSet<Integer>());
    if (!dependencies.containsKey(id))
      dependencies.put(id, new TreeSet<Integer>());
    dependencies.get(head).add(id);
  }

  
  sourceReader.close();

  leftDepLMWriter.close();
  rightDepLMWriter.close();
  headDepLMWriter.close();
  
}
 
Example 18
Source File: DependencyLanguageModelScoreNBest.java    From phrasal with GNU General Public License v3.0 4 votes vote down vote up
public static void main(String[] args) throws IOException {

    Properties options = StringUtils.argsToProperties(args, optionArgDefs());
    String sourceTokens = PropertiesUtils.get(options, "sourceTokens", null, String.class);
    String nBestList = PropertiesUtils.get(options, "nBestList", null, String.class);
    String dependencies = PropertiesUtils.get(options, "dependencies", null, String.class);
    String lm = PropertiesUtils.get(options, "lm", null, String.class);
    String classMapFilename = PropertiesUtils.get(options, "classMap", null, String.class);
    DependencyLanguageModelPerplexity2.useHeadClasses = PropertiesUtils.getBool(options, "headClasses", false);
    
    
    boolean scoreFrag = PropertiesUtils.getBool(options, "scoreFrag", false);
    boolean scoreStop = PropertiesUtils.getBool(options, "scoreStop", false);
    boolean transitive = PropertiesUtils.getBool(options, "transitive", false);

    
    
    if (sourceTokens == null || nBestList == null || dependencies == null || lm == null) {
      System.err.println("java " + DependencyLanguageModelScoreNBest.class.getCanonicalName() + " -sourceTokens file -nBestList file -dependencies file -lm file [-classMap file]");
      return;
    }
    
    if (classMapFilename != null) {
      System.err.println("Loading word class mapping from " + classMapFilename);
      classMap = new LocalWordClassMap();
      classMap.load(classMapFilename);
    } else {
      classMap = null;
    }
    
    
    DEPLM = LanguageModelFactory.load(lm);

    
    LineNumberReader sourceReader = IOTools.getReaderFromFile(sourceTokens);
    LineNumberReader nBestListReader = IOTools.getReaderFromFile(nBestList);
    LineNumberReader dependenciesReader = IOTools.getReaderFromFile(dependencies);
    
    String separatorExpr = " \\|\\|\\| ";
    
    String separator = " ||| ";
    String sourceSentence;
    String nBestLine = nBestListReader.readLine();
    String currentId = nBestLine.split(separatorExpr)[0];
    
    DecimalFormat df = new DecimalFormat("0.####E0");

    while ((sourceSentence = sourceReader.readLine()) != null) {
      HashMap<Integer, Pair<IndexedWord, List<Integer>>> head2Dependents = DependencyUtils.getDependenciesFromCoNLLFileReader(dependenciesReader, true, true);
      Map<Integer, Integer> dependent2Head = DependencyUtils.getReverseDependencies(head2Dependents);

      while (nBestLine != null && nBestLine.split(separatorExpr)[0].equals(currentId)) {
        String nBestParts[] = nBestLine.split(separatorExpr);
        String translation = nBestParts[1];
        String alignmentString = nBestParts[4];
          
        SymmetricalWordAlignment alignment = new SymmetricalWordAlignment(sourceSentence, translation, alignmentString);
          
          
        Map<Integer, NavigableSet<Integer>> projectedDependencies = DependencyProjectorCoNLL.projectDependencies(dependent2Head, alignment, transitive);

        Pair<Double, Integer> treeScore = scoreTree(projectedDependencies, alignment.e(), scoreFrag, scoreStop);
          
        double score = treeScore.first;
        int deplmWordCount = treeScore.second;
      
        
        System.out.print(nBestParts[0]);
        System.out.print(separator);
        System.out.print(nBestParts[1]);
        System.out.print(separator);
        System.out.print(nBestParts[2]);
        System.out.print(" DEPLM: ");
        System.out.print(df.format(score));
        System.out.print(" DEPLMWORDPENALTY: ");
        System.out.print(-deplmWordCount);
        System.out.print(" DEPLMPERP: ");
        System.out.print(deplmWordCount > 0 ? df.format(score / deplmWordCount) : 0);
        System.out.print(separator);
        System.out.print(nBestParts[3]);
        System.out.print(separator);
        System.out.print(nBestParts[4]);
        System.out.println("");

        nBestLine = nBestListReader.readLine();
      }
      currentId = nBestLine != null ? nBestLine.split(separatorExpr)[0] : "";
    }

  }
 
Example 19
Source File: PhrasalService.java    From phrasal with GNU General Public License v3.0 4 votes vote down vote up
/**
   * Start the service.
   * 
   * @param args
   */
  public static void main(String[] args) {
    Properties options = StringUtils.argsToProperties(args, optionArgDefs());
    int port = PropertiesUtils.getInt(options, "p", DEFAULT_HTTP_PORT);
    boolean loadMockServlet = PropertiesUtils.getBool(options, "m", false);
    boolean localHost = PropertiesUtils.getBool(options, "l", false);
    String uiFile = options.getProperty("u", "debug.html");
    String resourcePath = options.getProperty("r", ".");

    // Parse arguments
    String argList = options.getProperty("",null);
    String[] parsedArgs = argList == null ? null : argList.split("\\s+");
    if (parsedArgs == null || parsedArgs.length != 1) {
      System.out.println(usage());
      System.exit(-1);
    }
    String phrasalIniFile = parsedArgs[0];
    
    // Setup the jetty server
    Server server = new Server();

    // Jetty 8 way of configuring the server
//    Connector connector = new SelectChannelConnector();
//    connector.setPort(port);
//    server.addConnector(connector);

//  Jetty9 way of configuring the server
    ServerConnector connector = new ServerConnector(server);
    connector.setPort(port);
    server.addConnector(connector);

    if (localHost) {
      connector.setHost(DEBUG_URL);
    }
    
    // Setup the servlet context
    ServletContextHandler context = new ServletContextHandler(ServletContextHandler.SESSIONS);
    context.setContextPath("/");
 
    // Add Phrasal servlet
    PhrasalServlet servlet = loadMockServlet ? new PhrasalServlet() : new PhrasalServlet(phrasalIniFile);
    context.addServlet(new ServletHolder(servlet), SERVLET_ROOT);

    // TODO(spenceg): gzip compression causes an encoding problem for unicode characters
    // on the client. Not sure if the compression or decompression is the problem.
//    EnumSet<DispatcherType> dispatches = EnumSet.of(DispatcherType.REQUEST, DispatcherType.ASYNC);
//    context.addFilter(new FilterHolder(new IncludableGzipFilter()), "/t", dispatches);

    // Add debugging web-page
    ResourceHandler resourceHandler = new ResourceHandler();
    resourceHandler.setWelcomeFiles(new String[]{ uiFile });
    resourceHandler.setResourceBase(resourcePath);

    HandlerList handlers = new HandlerList();
    handlers.setHandlers(new Handler[] { resourceHandler, context });
    server.setHandler(handlers);
    
    // Start the service
    try {
      logger.info("Starting PhrasalService on port: " + String.valueOf(port));
      server.start();
      server.join();
    } catch (Exception e) {
      logger.error("Servlet crashed. Service shutting down.");
      e.printStackTrace();
    }
  }
 
Example 20
Source File: Evaluate.java    From phrasal with GNU General Public License v3.0 4 votes vote down vote up
/**
 * 
 * @param args
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
  if (args.length < 2) {
    System.err.print(usage());
    System.exit(-1);
  }

  Properties options = StringUtils.argsToProperties(args, argDefs());
  boolean disableTokenization = PropertiesUtils.getBool(options, "no-nist", false);
  boolean doCased = PropertiesUtils.getBool(options, "cased", false);

  // Setup the metric tokenization scheme. Applies to both the references and
  // hypotheses
  if (doCased) NISTTokenizer.lowercase(false);
  NISTTokenizer.normalize( ! disableTokenization);

  // Load the references
  String[] parsedArgs = options.getProperty("").split("\\s+");
  final String evalMetric = parsedArgs[0];
  String[] refs= Arrays.copyOfRange(parsedArgs, 1, parsedArgs.length);
  final List<List<Sequence<IString>>> references = MetricUtils.readReferences(refs, true);
  System.out.printf("Metric: %s with %d references%n", evalMetric, refs.length);

  EvaluationMetric<IString,String> metric = CorpusLevelMetricFactory.newMetric(evalMetric, references);
  IncrementalEvaluationMetric<IString,String> incMetric = metric.getIncrementalMetric();

  LineNumberReader reader = new LineNumberReader(new InputStreamReader(
      System.in));
  for (String line; (line = reader.readLine()) != null; ) {
    line = NISTTokenizer.tokenize(line);
    Sequence<IString> translation = IStrings.tokenize(line);
    ScoredFeaturizedTranslation<IString, String> tran = new ScoredFeaturizedTranslation<>(
        translation, null, 0);
    incMetric.add(tran);
  }
  // Check for an incomplete set of translations
  if (reader.getLineNumber() < references.size()) {
    System.err.printf("WARNING: Translation candidate file is shorter than references (%d/%d)%n", 
        reader.getLineNumber(), references.size());
  }
  reader.close();

  System.out.printf("%s = %.3f%n", evalMetric, 100 * Math.abs(incMetric.score()));
  System.out.printf("Details:%n%s%n", incMetric.scoreDetails());
}