Java Code Examples for edu.stanford.nlp.io.IOUtils

The following examples show how to use edu.stanford.nlp.io.IOUtils. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
/**
 * A debugging method to try relation extraction from the console.
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
  Properties props = StringUtils.argsToProperties(args);
  props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
  props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");

  StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
  IOUtils.console("sentence> ", line -> {
    Annotation ann = new Annotation(line);
    pipeline.annotate(ann);
    for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
      sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(System.err::println);
      System.out.println(sentence);
    }
  });
}
 
Example 2
public static IntelKBPRelationExtractor loadStatisticalExtractor() throws IOException, ClassNotFoundException {
    log.info("Loading KBP classifier from " + model);
    Object object = IOUtils.readObjectFromURLOrClasspathOrFileSystem(model);
    IntelKBPRelationExtractor statisticalExtractor;
    if (object instanceof LinearClassifier) {
        //noinspection unchecked
        statisticalExtractor = new DefaultKBPStatisticalExtractor((Classifier<String, String>) object);
    } else if (object instanceof DefaultKBPStatisticalExtractor) {
        statisticalExtractor = (DefaultKBPStatisticalExtractor) object;
    } else if (object instanceof edu.stanford.nlp.ie.KBPStatisticalExtractor) {
        edu.stanford.nlp.ie.KBPStatisticalExtractor kbp = (edu.stanford.nlp.ie.KBPStatisticalExtractor) object;
        statisticalExtractor = new DefaultKBPStatisticalExtractor(kbp.classifier);
    } else {
        throw new ClassCastException(object.getClass() + " cannot be cast into a " + DefaultKBPStatisticalExtractor.class);
    }
    return statisticalExtractor;
}
 
Example 3
public IntelKBPTokensregexExtractor(String tokensregexDir) {
    logger.log("Creating TokensRegexExtractor");
    // Create extractors        
    for (RelationType rel : RelationType.values()) {
        if (IntelConfig.bSeprateFormerTitle || rel != RelationType.PER_FORMER_TITLE) {
            String path = tokensregexDir + File.separator + rel.canonicalName.replaceAll("/", "SLASH") + ".rules";
            if (IOUtils.existsInClasspathOrFileSystem(path)) {
                List<String> listFiles = new ArrayList<>();
                listFiles.add(tokensregexDir + File.separator + "defs.rules");
                listFiles.add(path);
                logger.log("Rule files for relation " + rel + " is " + path);
                Env env = TokenSequencePattern.getNewEnv();
                env.bind("collapseExtractionRules", true);
                CoreMapExpressionExtractor extr = CoreMapExpressionExtractor.createExtractorFromFiles(env, listFiles).keepTemporaryTags();
                rules.put(rel, extr);
            }
        }
    }
}
 
Example 4
public static IntelKBPRelationExtractor loadStatisticalExtractor() throws IOException, ClassNotFoundException {
    log.info("Loading KBP classifier from " + MODEL);
    Object object = edu.stanford.nlp.io.IOUtils.readObjectFromURLOrClasspathOrFileSystem(MODEL);
    IntelKBPRelationExtractor statisticalExtractor;
    if (object instanceof LinearClassifier) {
        //noinspection unchecked
        statisticalExtractor = new IntelKBPStatisticalExtractor((Classifier<String, String>) object);
    } else if (object instanceof IntelKBPStatisticalExtractor) {
        statisticalExtractor = (IntelKBPStatisticalExtractor) object;
    } else if (object instanceof edu.stanford.nlp.ie.KBPStatisticalExtractor) {
        edu.stanford.nlp.ie.KBPStatisticalExtractor kbp = (edu.stanford.nlp.ie.KBPStatisticalExtractor) object;
        statisticalExtractor = new IntelKBPStatisticalExtractor(kbp.classifier);
    } else {
        throw new ClassCastException(object.getClass() + " cannot be cast into a " + IntelKBPStatisticalExtractor.class);
    }
    return statisticalExtractor;
}
 
Example 5
public static void main(String[] args) throws IOException {
    Properties props = StringUtils.argsToProperties(args);
    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
    props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");
    Set<String> interested = Stream.of("per:title", "per:employee_of", "org:top_members/employees").collect(Collectors.toSet());
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    IOUtils.console("sentence> ", line -> {
        Annotation ann = new Annotation(line);
        pipeline.annotate(ann);
        for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
            sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(r -> {
                 String relation = r.relationGloss();
                if(interested.contains(relation)) {
                    System.err.println(r);
                }
            });
        }
    });
}
 
Example 6
public KBPTokensregexExtractor(String tokensregexDir) {
  logger.log("Creating TokensRegexExtractor");
  // Create extractors
  for (RelationType rel : RelationType.values()) {
    String path = tokensregexDir + File.separator + rel.canonicalName.replaceAll("/", "SLASH") + ".rules";
    if (IOUtils.existsInClasspathOrFileSystem(path)) {
      List<String> listFiles = new ArrayList<>();
      listFiles.add(tokensregexDir + File.separator + "defs.rules");
      listFiles.add(path);
      logger.log("Rule files for relation " + rel + " is " + path);
      Env env = TokenSequencePattern.getNewEnv();
      env.bind("collapseExtractionRules", true);
      CoreMapExpressionExtractor extr = CoreMapExpressionExtractor.createExtractorFromFiles(env, listFiles).keepTemporaryTags();
      rules.put(rel, extr);
    }
  }
}
 
Example 7
/**
 * Runs an interactive shell where input text is processed with the given pipeline.
 *
 * @param pipeline The pipeline to be used
 * @throws IOException If IO problem with stdin
 */
private static void shell(MultiLangsStanfordCoreNLPClient pipeline) throws IOException {
  log.info("Entering interactive shell. Type q RETURN or EOF to quit.");
  final StanfordCoreNLP.OutputFormat outputFormat = StanfordCoreNLP.OutputFormat.valueOf(pipeline.properties.getProperty("outputFormat", "text").toUpperCase());
  IOUtils.console("NLP> ", line -> {
    if ( ! line.isEmpty()) {
      Annotation anno = pipeline.process(line);
      try {
        switch (outputFormat) {
          case XML:
            new XMLOutputter().print(anno, System.out);
            break;
          case JSON:
            new JSONOutputter().print(anno, System.out);
            System.out.println();
            break;
          case CONLL:
            new CoNLLOutputter().print(anno, System.out);
            System.out.println();
            break;
          case TEXT:
            new TextOutputter().print(anno, System.out);
            break;
          case SERIALIZED:
            warn("You probably cannot read the serialized output, so printing in text instead");
            new TextOutputter().print(anno, System.out);
            break;
          default:
            throw new IllegalArgumentException("Cannot output in format " + outputFormat + " from the interactive shell");
        }
      } catch (IOException e) {
        throw new RuntimeIOException(e);
      }
    }
  });
}
 
Example 8
Source Project: wiseowl   Source File: DocumentFrequencyCounter.java    License: MIT License 5 votes vote down vote up
/**
 * Computes a result, or throws an exception if unable to do so.
 *
 * @return computed result
 * @throws Exception if unable to compute a result
 */
public Counter<String> call() throws Exception {
  // We need to hallucinate some overarching document tag.. because the Gigaword files don't
  // have them :/
  String fileContents = IOUtils.slurpFile(file);
  fileContents = "<docs>" + fileContents + "</docs>";

  return getIDFMapForFile(new StringReader(fileContents));
}
 
Example 9
public static void main(String[] args) throws IOException{

        IOUtils.console("sentence> ", line -> {
            List<String> ners = extractNER(line);
            for (String ner : ners) {
                System.out.print(ner + ",");
            }
            System.out.println();
        });
    }
 
Example 10
public static void main(String[] args) throws IOException {

        IOUtils.console("sentence> ", line -> {
            HashMap<RelationTriple, String> triple = extract(line);
            for (RelationTriple s : triple.keySet()) {
                System.out.println(s);
            }
        });
    }
 
Example 11
/**
 * A debugging method to try relation extraction from the console.
 *
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
    Properties props = StringUtils.argsToProperties(args);
    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
    props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");

    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    IOUtils.console("sentence> ", line -> {
        Annotation ann = new Annotation(line);
        pipeline.annotate(ann);
        for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
            sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(System.err::println);
        }
    });
}
 
Example 12
public IntelKBPSemgrexExtractor(String semgrexdir) throws IOException {
    logger.log("Creating SemgrexRegexExtractor");
    // Create extractors
    for (RelationType rel : RelationType.values()) {
        String filename = semgrexdir + File.separator + rel.canonicalName.replace("/", "SLASH") + ".rules";
        if (IOUtils.existsInClasspathOrFileSystem(filename)) {

            List<SemgrexPattern> rulesforrel = SemgrexBatchParser.compileStream(IOUtils.getInputStreamFromURLOrClasspathOrFileSystem(filename));
            logger.log("Read " + rulesforrel.size() + " rules from " + filename + " for relation " + rel);
            rules.put(rel, rulesforrel);
        }
    }
}
 
Example 13
public static void main(String[] args) throws IOException {

        IOUtils.console("sentence> ", line -> {
            HashMap<RelationTriple, String> triple = extract(line);
            for (RelationTriple s: triple.keySet()){
                System.out.println(s);
            }
        });
    }
 
Example 14
public static void main(String[] args) throws IOException, ClassNotFoundException {
    RedwoodConfiguration.standard().apply();  // Disable SLF4J crap.
    ArgumentParser.fillOptions(edu.stanford.nlp.ie.KBPEnsembleExtractor.class, args);

    Object object = IOUtils.readObjectFromURLOrClasspathOrFileSystem(STATISTICAL_MODEL);
    IntelKBPRelationExtractor statisticalExtractor;
    if (object instanceof LinearClassifier) {
        //noinspection unchecked
        statisticalExtractor = new IntelKBPStatisticalExtractor((Classifier<String, String>) object);
    } else if (object instanceof IntelKBPStatisticalExtractor) {
        statisticalExtractor = (IntelKBPStatisticalExtractor) object;
    } else {
        throw new ClassCastException(object.getClass() + " cannot be cast into a " + IntelKBPStatisticalExtractor.class);
    }
    logger.info("Read statistical model from " + STATISTICAL_MODEL);
    IntelKBPRelationExtractor extractor = new IntelKBPEnsembleExtractor(
            new IntelKBPTokensregexExtractor(TOKENSREGEX_DIR),
            new IntelKBPSemgrexExtractor(SEMGREX_DIR),
            statisticalExtractor
    );

    List<Pair<KBPInput, String>> testExamples = DatasetUtils.readDataset(TEST_FILE);

    extractor.computeAccuracy(testExamples.stream(), PREDICTIONS.map(x -> {
        try {
            return "stdout".equalsIgnoreCase(x) ? System.out : new PrintStream(new FileOutputStream(x));
        } catch (IOException e) {
            throw new RuntimeIOException(e);
        }
    }));

}
 
Example 15
public static void trainModel() throws IOException {
    forceTrack("Training data");
    List<Pair<KBPInput, String>> trainExamples = DatasetUtils.readDataset(TRAIN_FILE);
    log.info("Read " + trainExamples.size() + " examples");
    log.info("" + trainExamples.stream().map(Pair::second).filter(NO_RELATION::equals).count() + " are " + NO_RELATION);
    endTrack("Training data");

    // Featurize + create the dataset
    forceTrack("Creating dataset");
    RVFDataset<String, String> dataset = new RVFDataset<>();
    final AtomicInteger i = new AtomicInteger(0);
    long beginTime = System.currentTimeMillis();
    trainExamples.stream().parallel().forEach(example -> {
        if (i.incrementAndGet() % 1000 == 0) {
            log.info("[" + Redwood.formatTimeDifference(System.currentTimeMillis() - beginTime) +
                    "] Featurized " + i.get() + " / " + trainExamples.size() + " examples");
        }
        Counter<String> features = features(example.first);  // This takes a while per example
        synchronized (dataset) {
            dataset.add(new RVFDatum<>(features, example.second));
        }
    });
    trainExamples.clear();  // Free up some memory
    endTrack("Creating dataset");

    // Train the classifier
    log.info("Training classifier:");
    Classifier<String, String> classifier = trainMultinomialClassifier(dataset, FEATURE_THRESHOLD, SIGMA);
    dataset.clear();  // Free up some memory

    // Save the classifier
    IOUtils.writeObjectToFile(new IntelKBPStatisticalExtractor(classifier), MODEL_FILE);
}
 
Example 16
public KBPSemgrexExtractor(String semgrexdir) throws IOException {
  logger.log("Creating SemgrexRegexExtractor");
  // Create extractors
  for (RelationType rel : RelationType.values()) {
    String filename = semgrexdir + File.separator + rel.canonicalName.replace("/", "SLASH") + ".rules";
    if (IOUtils.existsInClasspathOrFileSystem(filename)) {

      List<SemgrexPattern> rulesforrel = SemgrexBatchParser.compileStream(IOUtils.getInputStreamFromURLOrClasspathOrFileSystem(filename));
      logger.log("Read " + rulesforrel.size() + " rules from " + filename + " for relation " + rel);
      rules.put(rel, rulesforrel);
    }
  }
}
 
Example 17
public static void main(String[] args) throws IOException, ClassNotFoundException {
  RedwoodConfiguration.standard().apply();  // Disable SLF4J crap.
  ArgumentParser.fillOptions(edu.stanford.nlp.ie.KBPEnsembleExtractor.class, args);

  Object object = IOUtils.readObjectFromURLOrClasspathOrFileSystem(STATISTICAL_MODEL);
  edu.stanford.nlp.ie.KBPRelationExtractor statisticalExtractor;
  if (object instanceof LinearClassifier) {
    //noinspection unchecked
    statisticalExtractor = new IntelKBPStatisticalExtractor((Classifier<String, String>) object);
  } else if (object instanceof IntelKBPStatisticalExtractor) {
    statisticalExtractor = (IntelKBPStatisticalExtractor) object;
  } else {
    throw new ClassCastException(object.getClass() + " cannot be cast into a " + IntelKBPStatisticalExtractor.class);
  }
  logger.info("Read statistical model from " + STATISTICAL_MODEL);
  edu.stanford.nlp.ie.KBPRelationExtractor extractor = new edu.stanford.nlp.ie.KBPEnsembleExtractor(
      new IntelKBPTokensregexExtractor(TOKENSREGEX_DIR),
      new IntelKBPSemgrexExtractor(SEMGREX_DIR),
      statisticalExtractor
  );

  List<Pair<KBPInput, String>> testExamples = KBPRelationExtractor.readDataset(TEST_FILE);

  extractor.computeAccuracy(testExamples.stream(), PREDICTIONS.map(x -> {
    try {
      return "stdout".equalsIgnoreCase(x) ? System.out : new PrintStream(new FileOutputStream(x));
    } catch (IOException e) {
      throw new RuntimeIOException(e);
    }
  }));

}
 
Example 18
Source Project: jstarcraft-nlp   Source File: CoreNlpTokenizer.java    License: Apache License 2.0 4 votes vote down vote up
private boolean processInput() {
    Annotation annotation = new Annotation(IOUtils.slurpReader(input));
    pipeline.annotate(annotation);
    sentences = annotation.get(SentencesAnnotation.class).iterator();
    return true;
}