Java Code Examples for edu.stanford.nlp.util.Pair

The following examples show how to use edu.stanford.nlp.util.Pair. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
@Override
public Pair<String, Double> classify(KBPInput input) {
    for (RelationType rel : RelationType.values()) {

        if (rules.containsKey(rel) &&
                rel.entityType == input.subjectType &&
                rel.validNamedEntityLabels.contains(input.objectType)) {
            Collection<SemgrexPattern> rulesForRel = rules.get(rel);
            CoreMap sentence = input.sentence.asCoreMap(Sentence::nerTags, Sentence::dependencyGraph);
            boolean matches
                    = matches(sentence, rulesForRel, input,
                    sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class)) ||
                    matches(sentence, rulesForRel, input,
                            sentence.get(SemanticGraphCoreAnnotations.AlternativeDependenciesAnnotation.class));
            if (matches) {
                //logger.log("MATCH for " + rel +  ". " + sentence: + sentence + " with rules for  " + rel);
                return Pair.makePair(rel.canonicalName, 1.0);
            }
        }
    }

    return Pair.makePair(NO_RELATION, 1.0);
}
 
Example 2
@Override
public Pair<String, Double> classify(KBPInput input) {
    switch (ensembleStrategy) {
        
        case DEFAULT:
            return classifyDefault(input);
        case HIGHEST_SCORE:
            return classifyWithHighestScore(input);
        case VOTE:
            return classifyWithVote(input);
        case WEIGHTED_VOTE:
            return classifyWithWeightedVote(input);
        case HIGH_RECALL:
            return classifyWithHighRecall(input);
        case HIGH_PRECISION:
            return classifyWithHighPrecision(input);
        default:
            throw new UnsupportedClassVersionError(ensembleStrategy + " not supported");
    }
}
 
Example 3
@Override
public Pair<String, Double> classify(KBPInput input) {
  for (RelationType rel : RelationType.values()) {

    if (rules.containsKey(rel) &&
        rel.entityType == input.subjectType &&
        rel.validNamedEntityLabels.contains(input.objectType)) {
      Collection<SemgrexPattern> rulesForRel = rules.get(rel);
      CoreMap sentence = input.sentence.asCoreMap(Sentence::nerTags, Sentence::dependencyGraph);
      boolean matches
          = matches(sentence, rulesForRel, input,
          sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class)) ||
          matches(sentence, rulesForRel, input,
              sentence.get(SemanticGraphCoreAnnotations.AlternativeDependenciesAnnotation.class));
      if (matches) {
        //logger.log("MATCH for " + rel +  ". " + sentence: + sentence + " with rules for  " + rel);
        return Pair.makePair(rel.canonicalName, 1.0);
      }
    }
  }

  return Pair.makePair(NO_RELATION, 1.0);
}
 
Example 4
public void loadDependencies(String filename) throws IOException {
  LineNumberReader reader = IOTools.getReaderFromFile(filename);
  forwardDependenciesCache = new HashMap<Integer, Map<Integer, HashSet<Integer>>>();
  reverseDependenciesCache = new HashMap<Integer, Map<Integer, Integer>>();
  reachableNodesCache = new HashMap<Integer, Map<Integer, Set<Integer>>>();

  
  HashMap<Integer, Pair<IndexedWord, List<Integer>>> deps;
  int i = 0;
  while ((deps = DependencyUtils.getDependenciesFromCoNLLFileReader(reader, true, true)) != null) {
    reverseDependenciesCache.put(i,DependencyUtils.getReverseDependencies(deps));
    Map<Integer, HashSet<Integer>> forwardDeps = new HashMap<Integer, HashSet<Integer>>();
    for (Integer gov : deps.keySet()) {
      List<Integer> children = deps.get(gov).second;
      forwardDeps.put(gov, new HashSet<Integer>());
      for (Integer child : children) {
        forwardDeps.get(gov).add(child);
      }
    }
    forwardDependenciesCache.put(i, forwardDeps);
    i++;
  }
  
  reader.close();
}
 
Example 5
Source Project: phrasal   Source File: TranslationLayout.java    License: GNU General Public License v3.0 6 votes vote down vote up
public boolean addTranslationRow(String name, String trans, Color bgColor) {
  JLabel label = new JLabel(trans);
  label.setOpaque(true);
  label.setBackground(bgColor);
  label.setForeground(Color.WHITE);

  GridBagConstraints c = new GridBagConstraints();
  c.fill = GridBagConstraints.HORIZONTAL;
  c.gridx = 0;
  c.ipady = 20;
  c.gridwidth = numColumns;

  if (unusedRows.isEmpty()) {
    ++numFullTranslationRows;
    c.gridy = numRows + numFullTranslationRows;
  } else {
    c.gridy = unusedRows.removeFirst();
  }

  if (panel != null)
    panel.add(label, c);
  fullTranslations.put(name, new Pair<Integer, JLabel>(c.gridy, label));

  return true;
}
 
Example 6
Source Project: uncc2014watsonsim   Source File: Phrase.java    License: GNU General Public License v2.0 6 votes vote down vote up
private static Map<Integer, Pair<CorefMention, CorefMention>> _unpronoun(Phrase p) {
	Stream<Pair<CorefMention, CorefMention>> s =
			Stream.of(p.memo(Phrase.coreNLP).get(CorefChainAnnotation.class))
		.filter(Objects::nonNull)  // Do nothing with an empty map
		.flatMap(chains -> chains.entrySet().stream()) // Disassemble the map
	    .flatMap(entry -> {
			// Link each entry to it's main mention
			CorefMention main = entry.getValue().getRepresentativeMention();
			return entry.getValue().getMentionsInTextualOrder().stream()
				.filter(mention -> mention != main)
				.map(mention -> makePair(mention, main));
		});
	// Type inference chokes here so write it down then return.
	return s.collect(HashMap::new,
			(m, pair) -> m.put(pair.first.headIndex, pair),
			(l, r) -> {});
}
 
Example 7
Source Project: ADW   Source File: TextualSimilarity.java    License: GNU General Public License v3.0 6 votes vote down vote up
/**
 * returns 
 * @param sentence
 * 			input sentence, space delimited
 * @param discardStopWords
 * 			true if stopwords are to be discarded from the sentence 			
 * @return
 * 		a pair containing <list of word-pos, remaining not-handled terms>  
 * 		
 */
public Pair<List<String>, List<String>> getStanfordSentence(String sentence)
{
	List<WordLemmaTag> wlts = SentenceProcessor.getInstance().processSentence(sentence, false);
	
	List<String> terms = null;
	StanfordSentence sSentence = StanfordSentence.fromLine(Strings.join(wlts," "));
	
	try
	{
		 terms = sSentence.getTerms(TAGS, 
				 Language.EN, 
				 null, 
				 MultiwordBelongingTo.WORDNET, 
				 CompoundingParameter.ALLOW_MULTIWORD_EXPRESSIONS,
				 CompoundingParameter.APPEND_POS);	 
	}
	catch(Exception e)
	{
		e.printStackTrace();
	}

	//discards OOVs, and tries to map incorrect pos-tags to the correct ones
	return fixTerms(terms, discardStopwords);
}
 
Example 8
Source Project: ADW   Source File: Preprocess.java    License: GNU General Public License v3.0 6 votes vote down vote up
public static void fixAllCasings(List<Pair<String,String>> pairs, String path)
{
	try
	{
		BufferedWriter bw = new BufferedWriter(new FileWriter(path, false)); 
		
		for(Pair<String,String> aPair : pairs)
		{
			Pair<String,String> fixedPair = caseFixer(aPair);
			
			bw.write(fixedPair.first+"\t"+fixedPair.second+"\n");
		}
		
		bw.close();
	}
	catch(Exception e)
	{
		e.printStackTrace();
	}
}
 
Example 9
Source Project: dependensee   Source File: Node.java    License: GNU General Public License v2.0 6 votes vote down vote up
public int getPathLength(Node n) {

        Queue<Pair<Node, Integer>> q = new LinkedList<Pair<Node, Integer>>();
        Set<Node> marked = new HashSet<Node>();
        q.add(new Pair<Node, Integer>(this, 0));
        marked.add(this);
        while (!q.isEmpty()) {
            Pair<Node, Integer> v = q.remove();
            if (v.first == n) {
                return v.second;
            }
            if (v.first.parent != null && !marked.contains(v.first.parent)) {
                q.add(new Pair<Node, Integer>(v.first.parent, v.second + 1));
                marked.add(v.first.parent);
            }
            for (Node node : v.first.children) {
                q.add(new Pair<Node, Integer>(node, v.second + 1));
                marked.add(node);
            }
        }
        return Integer.MAX_VALUE;
    }
 
Example 10
public static void main(String[] args) throws IOException {
    RedwoodConfiguration.standard().apply();  // Disable SLF4J crap.
    ArgumentParser.fillOptions(IntelKBPSemgrexExtractor.class, args);
    IntelKBPSemgrexExtractor extractor = new IntelKBPSemgrexExtractor(DIR);
    List<Pair<KBPInput, String>> testExamples = DatasetUtils.readDataset(TEST_FILE);

    extractor.computeAccuracy(testExamples.stream(), PREDICTIONS.map(x -> {
        try {
            return "stdout".equalsIgnoreCase(x) ? System.out : new PrintStream(new FileOutputStream(x));
        } catch (IOException e) {
            throw new RuntimeIOException(e);
        }
    }));
}
 
Example 11
default Accuracy computeAccuracy(Stream<Pair<KBPInput, String>> examples,
                                 Optional<PrintStream> predictOut) {
    forceTrack("Accuracy");
    Accuracy accuracy = new Accuracy();
    AtomicInteger testI = new AtomicInteger(0);
    DecimalFormat confidenceFormat = new DecimalFormat("0.0000");
    forceTrack("Featurizing");
    examples.parallel().map(example -> {
        Pair<String, Double> predicted = this.classify(example.first);
        synchronized (accuracy) {
            accuracy.predict(Collections.singleton(predicted.first), Collections.singleton(example.second));
        }
        if (testI.incrementAndGet() % 1000 == 0) {
            log(IntelKBPRelationExtractor.class, "[" + testI.get() + "]  " + accuracy.toOneLineString());
        }
        return predicted.first + "\t" + confidenceFormat.format(predicted.second);
    })
            .forEachOrdered(line -> {
                if (predictOut.isPresent()) {
                    predictOut.get().println(line);
                }
            });
    endTrack("Featurizing");
    log(accuracy.toString());
    endTrack("Accuracy");
    return accuracy;
}
 
Example 12
public static void main(String[] args) throws IOException {
    RedwoodConfiguration.standard().apply();  // Disable SLF4J crap.
    ArgumentParser.fillOptions(IntelKBPTokensregexExtractor.class, args);
    IntelKBPTokensregexExtractor extractor = new IntelKBPTokensregexExtractor(DIR);
    List<Pair<KBPInput, String>> testExamples = DatasetUtils.readDataset(TEST_FILE);

    extractor.computeAccuracy(testExamples.stream(), PREDICTIONS.map(x -> {
        try {
            return "stdout".equalsIgnoreCase(x) ? System.out : new PrintStream(new FileOutputStream(x));
        } catch (IOException e) {
            throw new RuntimeIOException(e);
        }
    }));

}
 
Example 13
private Pair<String, Double> classifyWithHighPrecision(KBPInput input) {
    Pair<String, Double> prediction = Pair.makePair(edu.stanford.nlp.ie.KBPRelationExtractor.NO_RELATION, 1.0);
    for (IntelKBPRelationExtractor extractor : extractors) {
        if (!extractor.getClass().equals(IntelKBPTokensregexExtractor.class)) continue;
        return extractor.classify(input);
    }
    return prediction;
}
 
Example 14
private Pair<String, Double> classifyWithHighRecall(KBPInput input) {
    Pair<String, Double> prediction = Pair.makePair(edu.stanford.nlp.ie.KBPRelationExtractor.NO_RELATION, 1.0);
    for (IntelKBPRelationExtractor extractor : extractors) {
        Pair<String, Double> classifierPrediction = extractor.classify(input);
        logger.info(extractor.getClass().getSimpleName() + ": " + classifierPrediction + " for " + input.getObjectText() + " - " + input.getSubjectText());
        if (!classifierPrediction.first.equals(edu.stanford.nlp.ie.KBPRelationExtractor.NO_RELATION)) {
            if (prediction.first.equals(edu.stanford.nlp.ie.KBPRelationExtractor.NO_RELATION))
                prediction = classifierPrediction;
            else {
                prediction = classifierPrediction.second > prediction.second ? classifierPrediction : prediction;
            }
        }
    }
    return prediction;
}
 
Example 15
private Pair<String, Double> classifyWithVote(KBPInput input) {
    HashMap<String, Double> relation2Weights = new HashMap<>();
    Pair<String, Double> prediction = Pair.makePair(edu.stanford.nlp.ie.KBPRelationExtractor.NO_RELATION, 0.0);
    for (IntelKBPRelationExtractor extractor : extractors) {
        Pair<String, Double> classifierPrediction = extractor.classify(input);
        logger.info(extractor.getClass().getSimpleName() + ": " + classifierPrediction + " for " + input.getObjectText() + " - " + input.getSubjectText());
        Double weight = relation2Weights.get(classifierPrediction.first);
        Double newWeight = weight == null ? 1.0 / extractors.length : weight + 1.0 / extractors.length;
        relation2Weights.put(classifierPrediction.first, newWeight);
        if (newWeight > prediction.second) prediction = Pair.makePair(classifierPrediction.first, newWeight);
    }
    return prediction;
}
 
Example 16
private Pair<String, Double> classifyWithWeightedVote(KBPInput input) {
        HashMap<String, Double> relation2Weights = new HashMap<>();
        Pair<String, Double> prediction = Pair.makePair(edu.stanford.nlp.ie.KBPRelationExtractor.NO_RELATION, 0.0);
        for (IntelKBPRelationExtractor extractor : extractors) {
            Pair<String, Double> classifierPrediction = extractor.classify(input);
            logger.info(extractor.getClass().getSimpleName() + ": " + classifierPrediction + " for " + input.getObjectText() + " - " + input.getSubjectText());
//            if (classifierPrediction.first.equals(edu.stanford.nlp.ie.KBPRelationExtractor.NO_RELATION)) continue;
            Double weight = relation2Weights.get(classifierPrediction.first);
            Double newWeight = weight == null ? ModelWeight.getWeight(extractor) : weight + ModelWeight.getWeight(extractor);
            relation2Weights.put(classifierPrediction.first, newWeight);
            if (newWeight > prediction.second) prediction = Pair.makePair(classifierPrediction.first, newWeight);
        }
        return prediction;
    }
 
Example 17
private Pair<String, Double> classifyWithHighestScore(KBPInput input) {
    Pair<String, Double> prediction = Pair.makePair(edu.stanford.nlp.ie.KBPRelationExtractor.NO_RELATION, 1.0);
    for (IntelKBPRelationExtractor extractor : extractors) {
        Pair<String, Double> classifierPrediction = extractor.classify(input);
        logger.info(extractor.getClass().getSimpleName() + ": " + classifierPrediction + " for " + input.getObjectText() + " - " + input.getSubjectText());
        if (prediction.first.equals(edu.stanford.nlp.ie.KBPRelationExtractor.NO_RELATION) ||
                (!classifierPrediction.first.equals(edu.stanford.nlp.ie.KBPRelationExtractor.NO_RELATION) &&
                        classifierPrediction.second > prediction.second)
                ) {
            // The last prediction was NO_RELATION, or this is not NO_RELATION and has a higher score
            prediction = classifierPrediction;
        }
    }
    return prediction;
}
 
Example 18
private Pair<String, Double> classifyDefault(KBPInput input) {
    Pair<String, Double> prediction = Pair.makePair(edu.stanford.nlp.ie.KBPRelationExtractor.NO_RELATION, 1.0);
    for (IntelKBPRelationExtractor extractor : extractors) {
        Pair<String, Double> classifierPrediction = extractor.classify(input);
        if (prediction.first.equals(edu.stanford.nlp.ie.KBPRelationExtractor.NO_RELATION) ||
                (!classifierPrediction.first.equals(edu.stanford.nlp.ie.KBPRelationExtractor.NO_RELATION) &&
                        classifierPrediction.second > prediction.second)
                ){
            // The last prediction was NO_RELATION, or this is not NO_RELATION and has a higher score
            prediction = classifierPrediction;
        }
    }
    return prediction;
}
 
Example 19
public static void main(String[] args) throws IOException, ClassNotFoundException {
    RedwoodConfiguration.standard().apply();  // Disable SLF4J crap.
    ArgumentParser.fillOptions(edu.stanford.nlp.ie.KBPEnsembleExtractor.class, args);

    Object object = IOUtils.readObjectFromURLOrClasspathOrFileSystem(STATISTICAL_MODEL);
    IntelKBPRelationExtractor statisticalExtractor;
    if (object instanceof LinearClassifier) {
        //noinspection unchecked
        statisticalExtractor = new IntelKBPStatisticalExtractor((Classifier<String, String>) object);
    } else if (object instanceof IntelKBPStatisticalExtractor) {
        statisticalExtractor = (IntelKBPStatisticalExtractor) object;
    } else {
        throw new ClassCastException(object.getClass() + " cannot be cast into a " + IntelKBPStatisticalExtractor.class);
    }
    logger.info("Read statistical model from " + STATISTICAL_MODEL);
    IntelKBPRelationExtractor extractor = new IntelKBPEnsembleExtractor(
            new IntelKBPTokensregexExtractor(TOKENSREGEX_DIR),
            new IntelKBPSemgrexExtractor(SEMGREX_DIR),
            statisticalExtractor
    );

    List<Pair<KBPInput, String>> testExamples = DatasetUtils.readDataset(TEST_FILE);

    extractor.computeAccuracy(testExamples.stream(), PREDICTIONS.map(x -> {
        try {
            return "stdout".equalsIgnoreCase(x) ? System.out : new PrintStream(new FileOutputStream(x));
        } catch (IOException e) {
            throw new RuntimeIOException(e);
        }
    }));

}
 
Example 20
public static void trainModel() throws IOException {
    forceTrack("Training data");
    List<Pair<KBPInput, String>> trainExamples = DatasetUtils.readDataset(TRAIN_FILE);
    log.info("Read " + trainExamples.size() + " examples");
    log.info("" + trainExamples.stream().map(Pair::second).filter(NO_RELATION::equals).count() + " are " + NO_RELATION);
    endTrack("Training data");

    // Featurize + create the dataset
    forceTrack("Creating dataset");
    RVFDataset<String, String> dataset = new RVFDataset<>();
    final AtomicInteger i = new AtomicInteger(0);
    long beginTime = System.currentTimeMillis();
    trainExamples.stream().parallel().forEach(example -> {
        if (i.incrementAndGet() % 1000 == 0) {
            log.info("[" + Redwood.formatTimeDifference(System.currentTimeMillis() - beginTime) +
                    "] Featurized " + i.get() + " / " + trainExamples.size() + " examples");
        }
        Counter<String> features = features(example.first);  // This takes a while per example
        synchronized (dataset) {
            dataset.add(new RVFDatum<>(features, example.second));
        }
    });
    trainExamples.clear();  // Free up some memory
    endTrack("Creating dataset");

    // Train the classifier
    log.info("Training classifier:");
    Classifier<String, String> classifier = trainMultinomialClassifier(dataset, FEATURE_THRESHOLD, SIGMA);
    dataset.clear();  // Free up some memory

    // Save the classifier
    IOUtils.writeObjectToFile(new IntelKBPStatisticalExtractor(classifier), MODEL_FILE);
}
 
Example 21
public static void main(String[] args) throws IOException {
  RedwoodConfiguration.standard().apply();  // Disable SLF4J crap.
  ArgumentParser.fillOptions(edu.stanford.nlp.ie.KBPSemgrexExtractor.class, args);
  edu.stanford.nlp.ie.KBPSemgrexExtractor extractor = new edu.stanford.nlp.ie.KBPSemgrexExtractor(DIR);
  List<Pair<KBPInput, String>> testExamples = KBPRelationExtractor.readDataset(TEST_FILE);

  extractor.computeAccuracy(testExamples.stream(), PREDICTIONS.map(x -> {
    try {
      return "stdout".equalsIgnoreCase(x) ? System.out : new PrintStream(new FileOutputStream(x));
    } catch (IOException e) {
      throw new RuntimeIOException(e);
    }
  }));
}
 
Example 22
default Accuracy computeAccuracy(Stream<Pair<KBPInput, String>> examples,
                                 Optional<PrintStream> predictOut) {
  forceTrack("Accuracy");
  Accuracy accuracy = new Accuracy();
  AtomicInteger testI = new AtomicInteger(0);
  DecimalFormat confidenceFormat = new DecimalFormat("0.0000");
  forceTrack("Featurizing");
  examples.parallel().map(example -> {
    Pair<String, Double> predicted = this.classify(example.first);
    synchronized (accuracy) {
      accuracy.predict(Collections.singleton(predicted.first), Collections.singleton(example.second));
    }
    if (testI.incrementAndGet() % 1000 == 0) {
      log(KBPRelationExtractor.class, "[" + testI.get() + "]  " + accuracy.toOneLineString());
    }
    return predicted.first + "\t" + confidenceFormat.format(predicted.second);
  })
    .forEachOrdered(line -> {
      if (predictOut.isPresent()) {
        predictOut.get().println(line);
      }
    });
  endTrack("Featurizing");
  log(accuracy.toString());
  endTrack("Accuracy");
  return accuracy;
}
 
Example 23
public static void main(String[] args) throws IOException {
  RedwoodConfiguration.standard().apply();  // Disable SLF4J crap.
  ArgumentParser.fillOptions(edu.stanford.nlp.ie.KBPTokensregexExtractor.class, args);
  edu.stanford.nlp.ie.KBPTokensregexExtractor extractor = new edu.stanford.nlp.ie.KBPTokensregexExtractor(DIR);
  List<Pair<KBPInput, String>> testExamples = KBPRelationExtractor.readDataset(TEST_FILE);

  extractor.computeAccuracy(testExamples.stream(), PREDICTIONS.map(x -> {
    try {
      return "stdout".equalsIgnoreCase(x) ? System.out : new PrintStream(new FileOutputStream(x));
    } catch (IOException e) {
      throw new RuntimeIOException(e);
    }
  }));

}
 
Example 24
@Override
public Pair<String, Double> classify(KBPInput input) {
  Pair<String, Double> prediction = Pair.makePair(edu.stanford.nlp.ie.KBPRelationExtractor.NO_RELATION, 1.0);
  for (edu.stanford.nlp.ie.KBPRelationExtractor extractor : extractors) {
    Pair<String, Double> classifierPrediction = extractor.classify(input);
    if (prediction.first.equals(edu.stanford.nlp.ie.KBPRelationExtractor.NO_RELATION) ||
        (!classifierPrediction.first.equals(edu.stanford.nlp.ie.KBPRelationExtractor.NO_RELATION) &&
            classifierPrediction.second > prediction.second)
        ){
      // The last prediction was NO_RELATION, or this is not NO_RELATION and has a higher score
      prediction = classifierPrediction;
    }
  }
  return prediction;
}
 
Example 25
public static void main(String[] args) throws IOException, ClassNotFoundException {
  RedwoodConfiguration.standard().apply();  // Disable SLF4J crap.
  ArgumentParser.fillOptions(edu.stanford.nlp.ie.KBPEnsembleExtractor.class, args);

  Object object = IOUtils.readObjectFromURLOrClasspathOrFileSystem(STATISTICAL_MODEL);
  edu.stanford.nlp.ie.KBPRelationExtractor statisticalExtractor;
  if (object instanceof LinearClassifier) {
    //noinspection unchecked
    statisticalExtractor = new IntelKBPStatisticalExtractor((Classifier<String, String>) object);
  } else if (object instanceof IntelKBPStatisticalExtractor) {
    statisticalExtractor = (IntelKBPStatisticalExtractor) object;
  } else {
    throw new ClassCastException(object.getClass() + " cannot be cast into a " + IntelKBPStatisticalExtractor.class);
  }
  logger.info("Read statistical model from " + STATISTICAL_MODEL);
  edu.stanford.nlp.ie.KBPRelationExtractor extractor = new edu.stanford.nlp.ie.KBPEnsembleExtractor(
      new IntelKBPTokensregexExtractor(TOKENSREGEX_DIR),
      new IntelKBPSemgrexExtractor(SEMGREX_DIR),
      statisticalExtractor
  );

  List<Pair<KBPInput, String>> testExamples = KBPRelationExtractor.readDataset(TEST_FILE);

  extractor.computeAccuracy(testExamples.stream(), PREDICTIONS.map(x -> {
    try {
      return "stdout".equalsIgnoreCase(x) ? System.out : new PrintStream(new FileOutputStream(x));
    } catch (IOException e) {
      throw new RuntimeIOException(e);
    }
  }));

}
 
Example 26
Source Project: phrasal   Source File: MakeWordClasses.java    License: GNU General Public License v3.0 5 votes vote down vote up
/**
 * Create the input to a clustering iteration.
 * 
 * @param fullVocabulary
 * @param partitionNumber
 * @param threadId
 * @return
 */
private Pair<ClustererState,Integer> createInput(int partitionNumber, int threadId, int inputStart) {
  int partitionSize = effectiveVocabulary.size() / vparts;
  int partitionStart = partitionNumber*partitionSize;
  int partitionEnd = partitionNumber == vparts-1 ? effectiveVocabulary.size() : (partitionNumber+1)*partitionSize;
  partitionSize = partitionEnd-partitionStart;

  int targetInputSize = partitionSize / numThreads;
  int startIndex = inputStart == 0 ? partitionStart + inputStart : inputStart;
  int endIndex = Math.min(partitionEnd, startIndex + targetInputSize);
  if (endIndex - startIndex <= 0) return null;
  
  // Brants and Uszkoreit heuristic: make sure that all words from a given class
  // end up in the same worker.
  int i = endIndex-1;
  for (; i < partitionEnd-1; ++i) {
    IString iWord = effectiveVocabulary.get(i);
    IString nextWord = effectiveVocabulary.get(i+1);
    int iClass= wordToClass.get(iWord);
    int nextClass= wordToClass.get(nextWord);
    if (iClass != nextClass) {
      break;
    }
  }
  logger.info(String.format("endIndex: %d -> %d", endIndex, i+1));
  endIndex = i+1;

  List<IString> inputVocab = effectiveVocabulary.subList(startIndex, endIndex);
  
  logger.info(String.format("Partition %d thread %d size %d: input %d-%d", partitionNumber,
      threadId, inputVocab.size(), startIndex, endIndex-1));
  
  // Create the state
  ClustererState state =  new ClustererState(inputVocab, this.wordCount, 
      this.historyCount, this.wordToClass, this.classCount, this.classHistoryCount,
      numClasses, this.currentObjectiveValue);
  return new Pair<ClustererState,Integer>(state, endIndex);
}
 
Example 27
Source Project: phrasal   Source File: MERT.java    License: GNU General Public License v3.0 5 votes vote down vote up
static void displayWeights(Counter<String> wts) {

    List<Pair<String,Double>> wtsList = Counters.toDescendingMagnitudeSortedListWithCounts(wts);
    if (wtsList.size() > 100) {
      wtsList = wtsList.subList(0, 100);
    }

    for (Pair<String, Double> p : wtsList) {
      System.out.printf("%s %g\n", p.first, p.second);
    }
  }
 
Example 28
Source Project: phrasal   Source File: BLEUMetric.java    License: GNU General Public License v3.0 5 votes vote down vote up
private double getLocalSmoothScore(Sequence<TK> seq, int pos, int nbestId) {
  if (!enableCache || nbestId < 0)
    return computeLocalSmoothScore(seq, pos);
  Pair<Integer, Integer> pair = new Pair<Integer, Integer>(pos, nbestId);
  Double cached = smoothScoreCache.get(pair);
  if (cached == null) {
    cached = computeLocalSmoothScore(seq, pos);
    smoothScoreCache.put(pair, cached);
  }
  return cached;
}
 
Example 29
private static String preorder(Tree tree) {
  
  List<Tree> queue = new LinkedList<>();
  queue.add(tree);
  
  
  while ( ! queue.isEmpty()) {
    Tree currentNode = queue.remove(0);
    
    if (currentNode.isLeaf())
      continue;
    
    Tree children[] = currentNode.children();
    int childCount = children.length;
    IndexedWord hw = (IndexedWord) currentNode.label();
    List<FeatureNode> featureNodes = new ArrayList<>(childCount);
    for (int i = 0; i < childCount; i++) {
      featureNodes.add(new FeatureNode(children[i], hw));
      queue.add(children[i]);
    }
    if (childCount < 8) {
      Pair<Double, List<Integer>> result = search(featureNodes, new LinkedList<Integer>(), Double.NEGATIVE_INFINITY);
      if (result != null) {
        List<Integer> permutation = result.second;
        List<Tree> newChildren = new ArrayList<>(Arrays.asList(children));
        for (int i = 0; i < childCount; i++) {
          int idx = permutation.get(i);
          newChildren.set(idx, children[i]);
        }
        currentNode.setChildren(newChildren);
      } else {
        System.err.println("Warning: No path found.");
      }
    }
  }
  
  return StringUtils.join(tree.yieldWords());
}
 
Example 30
private static Tree generateShallowTree(HashMap<Integer, Pair<IndexedWord, List<Integer>>> dependencies) {
 
 if (dependencies.get(0) == null || dependencies.get(0).second.isEmpty()) {
   return new LabeledScoredTreeNode();
 }
 
 return generateSubTree(dependencies, dependencies.get(0).second.get(0));
}