edu.stanford.nlp.io.IOUtils Java Examples

The following examples show how to use edu.stanford.nlp.io.IOUtils. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: KBPTokensregexExtractor.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
public KBPTokensregexExtractor(String tokensregexDir) {
  logger.log("Creating TokensRegexExtractor");
  // Create extractors
  for (RelationType rel : RelationType.values()) {
    String path = tokensregexDir + File.separator + rel.canonicalName.replaceAll("/", "SLASH") + ".rules";
    if (IOUtils.existsInClasspathOrFileSystem(path)) {
      List<String> listFiles = new ArrayList<>();
      listFiles.add(tokensregexDir + File.separator + "defs.rules");
      listFiles.add(path);
      logger.log("Rule files for relation " + rel + " is " + path);
      Env env = TokenSequencePattern.getNewEnv();
      env.bind("collapseExtractionRules", true);
      CoreMapExpressionExtractor extr = CoreMapExpressionExtractor.createExtractorFromFiles(env, listFiles).keepTemporaryTags();
      rules.put(rel, extr);
    }
  }
}
 
Example #2
Source File: KBPTest.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
/**
 * A debugging method to try relation extraction from the console.
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
  Properties props = StringUtils.argsToProperties(args);
  props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
  props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");

  StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
  IOUtils.console("sentence> ", line -> {
    Annotation ann = new Annotation(line);
    pipeline.annotate(ann);
    for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
      sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(System.err::println);
      System.out.println(sentence);
    }
  });
}
 
Example #3
Source File: InteractiveDriver.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
public static void main(String[] args) throws IOException {
    Properties props = StringUtils.argsToProperties(args);
    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
    props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");
    Set<String> interested = Stream.of("per:title", "per:employee_of", "org:top_members/employees").collect(Collectors.toSet());
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    IOUtils.console("sentence> ", line -> {
        Annotation ann = new Annotation(line);
        pipeline.annotate(ann);
        for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
            sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(r -> {
                 String relation = r.relationGloss();
                if(interested.contains(relation)) {
                    System.err.println(r);
                }
            });
        }
    });
}
 
Example #4
Source File: IntelKBPStatisticalExtractor.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
public static IntelKBPRelationExtractor loadStatisticalExtractor() throws IOException, ClassNotFoundException {
    log.info("Loading KBP classifier from " + MODEL);
    Object object = edu.stanford.nlp.io.IOUtils.readObjectFromURLOrClasspathOrFileSystem(MODEL);
    IntelKBPRelationExtractor statisticalExtractor;
    if (object instanceof LinearClassifier) {
        //noinspection unchecked
        statisticalExtractor = new IntelKBPStatisticalExtractor((Classifier<String, String>) object);
    } else if (object instanceof IntelKBPStatisticalExtractor) {
        statisticalExtractor = (IntelKBPStatisticalExtractor) object;
    } else if (object instanceof edu.stanford.nlp.ie.KBPStatisticalExtractor) {
        edu.stanford.nlp.ie.KBPStatisticalExtractor kbp = (edu.stanford.nlp.ie.KBPStatisticalExtractor) object;
        statisticalExtractor = new IntelKBPStatisticalExtractor(kbp.classifier);
    } else {
        throw new ClassCastException(object.getClass() + " cannot be cast into a " + IntelKBPStatisticalExtractor.class);
    }
    return statisticalExtractor;
}
 
Example #5
Source File: DefaultKBPStatisticalExtractor.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
public static IntelKBPRelationExtractor loadStatisticalExtractor() throws IOException, ClassNotFoundException {
    log.info("Loading KBP classifier from " + model);
    Object object = IOUtils.readObjectFromURLOrClasspathOrFileSystem(model);
    IntelKBPRelationExtractor statisticalExtractor;
    if (object instanceof LinearClassifier) {
        //noinspection unchecked
        statisticalExtractor = new DefaultKBPStatisticalExtractor((Classifier<String, String>) object);
    } else if (object instanceof DefaultKBPStatisticalExtractor) {
        statisticalExtractor = (DefaultKBPStatisticalExtractor) object;
    } else if (object instanceof edu.stanford.nlp.ie.KBPStatisticalExtractor) {
        edu.stanford.nlp.ie.KBPStatisticalExtractor kbp = (edu.stanford.nlp.ie.KBPStatisticalExtractor) object;
        statisticalExtractor = new DefaultKBPStatisticalExtractor(kbp.classifier);
    } else {
        throw new ClassCastException(object.getClass() + " cannot be cast into a " + DefaultKBPStatisticalExtractor.class);
    }
    return statisticalExtractor;
}
 
Example #6
Source File: IntelKBPTokensregexExtractor.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
public IntelKBPTokensregexExtractor(String tokensregexDir) {
    logger.log("Creating TokensRegexExtractor");
    // Create extractors        
    for (RelationType rel : RelationType.values()) {
        if (IntelConfig.bSeprateFormerTitle || rel != RelationType.PER_FORMER_TITLE) {
            String path = tokensregexDir + File.separator + rel.canonicalName.replaceAll("/", "SLASH") + ".rules";
            if (IOUtils.existsInClasspathOrFileSystem(path)) {
                List<String> listFiles = new ArrayList<>();
                listFiles.add(tokensregexDir + File.separator + "defs.rules");
                listFiles.add(path);
                logger.log("Rule files for relation " + rel + " is " + path);
                Env env = TokenSequencePattern.getNewEnv();
                env.bind("collapseExtractionRules", true);
                CoreMapExpressionExtractor extr = CoreMapExpressionExtractor.createExtractorFromFiles(env, listFiles).keepTemporaryTags();
                rules.put(rel, extr);
            }
        }
    }
}
 
Example #7
Source File: KBPEnsembleExtractor.java    From InformationExtraction with GNU General Public License v3.0 5 votes vote down vote up
public static void main(String[] args) throws IOException, ClassNotFoundException {
  RedwoodConfiguration.standard().apply();  // Disable SLF4J crap.
  ArgumentParser.fillOptions(edu.stanford.nlp.ie.KBPEnsembleExtractor.class, args);

  Object object = IOUtils.readObjectFromURLOrClasspathOrFileSystem(STATISTICAL_MODEL);
  edu.stanford.nlp.ie.KBPRelationExtractor statisticalExtractor;
  if (object instanceof LinearClassifier) {
    //noinspection unchecked
    statisticalExtractor = new IntelKBPStatisticalExtractor((Classifier<String, String>) object);
  } else if (object instanceof IntelKBPStatisticalExtractor) {
    statisticalExtractor = (IntelKBPStatisticalExtractor) object;
  } else {
    throw new ClassCastException(object.getClass() + " cannot be cast into a " + IntelKBPStatisticalExtractor.class);
  }
  logger.info("Read statistical model from " + STATISTICAL_MODEL);
  edu.stanford.nlp.ie.KBPRelationExtractor extractor = new edu.stanford.nlp.ie.KBPEnsembleExtractor(
      new IntelKBPTokensregexExtractor(TOKENSREGEX_DIR),
      new IntelKBPSemgrexExtractor(SEMGREX_DIR),
      statisticalExtractor
  );

  List<Pair<KBPInput, String>> testExamples = KBPRelationExtractor.readDataset(TEST_FILE);

  extractor.computeAccuracy(testExamples.stream(), PREDICTIONS.map(x -> {
    try {
      return "stdout".equalsIgnoreCase(x) ? System.out : new PrintStream(new FileOutputStream(x));
    } catch (IOException e) {
      throw new RuntimeIOException(e);
    }
  }));

}
 
Example #8
Source File: MultiLangsStanfordCoreNLPClient.java    From blog-codes with Apache License 2.0 5 votes vote down vote up
/**
 * Runs an interactive shell where input text is processed with the given pipeline.
 *
 * @param pipeline The pipeline to be used
 * @throws IOException If IO problem with stdin
 */
private static void shell(MultiLangsStanfordCoreNLPClient pipeline) throws IOException {
  log.info("Entering interactive shell. Type q RETURN or EOF to quit.");
  final StanfordCoreNLP.OutputFormat outputFormat = StanfordCoreNLP.OutputFormat.valueOf(pipeline.properties.getProperty("outputFormat", "text").toUpperCase());
  IOUtils.console("NLP> ", line -> {
    if ( ! line.isEmpty()) {
      Annotation anno = pipeline.process(line);
      try {
        switch (outputFormat) {
          case XML:
            new XMLOutputter().print(anno, System.out);
            break;
          case JSON:
            new JSONOutputter().print(anno, System.out);
            System.out.println();
            break;
          case CONLL:
            new CoNLLOutputter().print(anno, System.out);
            System.out.println();
            break;
          case TEXT:
            new TextOutputter().print(anno, System.out);
            break;
          case SERIALIZED:
            warn("You probably cannot read the serialized output, so printing in text instead");
            new TextOutputter().print(anno, System.out);
            break;
          default:
            throw new IllegalArgumentException("Cannot output in format " + outputFormat + " from the interactive shell");
        }
      } catch (IOException e) {
        throw new RuntimeIOException(e);
      }
    }
  });
}
 
Example #9
Source File: KBPSemgrexExtractor.java    From InformationExtraction with GNU General Public License v3.0 5 votes vote down vote up
public KBPSemgrexExtractor(String semgrexdir) throws IOException {
  logger.log("Creating SemgrexRegexExtractor");
  // Create extractors
  for (RelationType rel : RelationType.values()) {
    String filename = semgrexdir + File.separator + rel.canonicalName.replace("/", "SLASH") + ".rules";
    if (IOUtils.existsInClasspathOrFileSystem(filename)) {

      List<SemgrexPattern> rulesforrel = SemgrexBatchParser.compileStream(IOUtils.getInputStreamFromURLOrClasspathOrFileSystem(filename));
      logger.log("Read " + rulesforrel.size() + " rules from " + filename + " for relation " + rel);
      rules.put(rel, rulesforrel);
    }
  }
}
 
Example #10
Source File: IntelKBPStatisticalExtractor.java    From InformationExtraction with GNU General Public License v3.0 5 votes vote down vote up
public static void trainModel() throws IOException {
    forceTrack("Training data");
    List<Pair<KBPInput, String>> trainExamples = DatasetUtils.readDataset(TRAIN_FILE);
    log.info("Read " + trainExamples.size() + " examples");
    log.info("" + trainExamples.stream().map(Pair::second).filter(NO_RELATION::equals).count() + " are " + NO_RELATION);
    endTrack("Training data");

    // Featurize + create the dataset
    forceTrack("Creating dataset");
    RVFDataset<String, String> dataset = new RVFDataset<>();
    final AtomicInteger i = new AtomicInteger(0);
    long beginTime = System.currentTimeMillis();
    trainExamples.stream().parallel().forEach(example -> {
        if (i.incrementAndGet() % 1000 == 0) {
            log.info("[" + Redwood.formatTimeDifference(System.currentTimeMillis() - beginTime) +
                    "] Featurized " + i.get() + " / " + trainExamples.size() + " examples");
        }
        Counter<String> features = features(example.first);  // This takes a while per example
        synchronized (dataset) {
            dataset.add(new RVFDatum<>(features, example.second));
        }
    });
    trainExamples.clear();  // Free up some memory
    endTrack("Creating dataset");

    // Train the classifier
    log.info("Training classifier:");
    Classifier<String, String> classifier = trainMultinomialClassifier(dataset, FEATURE_THRESHOLD, SIGMA);
    dataset.clear();  // Free up some memory

    // Save the classifier
    IOUtils.writeObjectToFile(new IntelKBPStatisticalExtractor(classifier), MODEL_FILE);
}
 
Example #11
Source File: IntelKBPEnsembleExtractor.java    From InformationExtraction with GNU General Public License v3.0 5 votes vote down vote up
public static void main(String[] args) throws IOException, ClassNotFoundException {
    RedwoodConfiguration.standard().apply();  // Disable SLF4J crap.
    ArgumentParser.fillOptions(edu.stanford.nlp.ie.KBPEnsembleExtractor.class, args);

    Object object = IOUtils.readObjectFromURLOrClasspathOrFileSystem(STATISTICAL_MODEL);
    IntelKBPRelationExtractor statisticalExtractor;
    if (object instanceof LinearClassifier) {
        //noinspection unchecked
        statisticalExtractor = new IntelKBPStatisticalExtractor((Classifier<String, String>) object);
    } else if (object instanceof IntelKBPStatisticalExtractor) {
        statisticalExtractor = (IntelKBPStatisticalExtractor) object;
    } else {
        throw new ClassCastException(object.getClass() + " cannot be cast into a " + IntelKBPStatisticalExtractor.class);
    }
    logger.info("Read statistical model from " + STATISTICAL_MODEL);
    IntelKBPRelationExtractor extractor = new IntelKBPEnsembleExtractor(
            new IntelKBPTokensregexExtractor(TOKENSREGEX_DIR),
            new IntelKBPSemgrexExtractor(SEMGREX_DIR),
            statisticalExtractor
    );

    List<Pair<KBPInput, String>> testExamples = DatasetUtils.readDataset(TEST_FILE);

    extractor.computeAccuracy(testExamples.stream(), PREDICTIONS.map(x -> {
        try {
            return "stdout".equalsIgnoreCase(x) ? System.out : new PrintStream(new FileOutputStream(x));
        } catch (IOException e) {
            throw new RuntimeIOException(e);
        }
    }));

}
 
Example #12
Source File: KBPModel.java    From InformationExtraction with GNU General Public License v3.0 5 votes vote down vote up
public static void main(String[] args) throws IOException {

        IOUtils.console("sentence> ", line -> {
            HashMap<RelationTriple, String> triple = extract(line);
            for (RelationTriple s: triple.keySet()){
                System.out.println(s);
            }
        });
    }
 
Example #13
Source File: IntelKBPSemgrexExtractor.java    From InformationExtraction with GNU General Public License v3.0 5 votes vote down vote up
public IntelKBPSemgrexExtractor(String semgrexdir) throws IOException {
    logger.log("Creating SemgrexRegexExtractor");
    // Create extractors
    for (RelationType rel : RelationType.values()) {
        String filename = semgrexdir + File.separator + rel.canonicalName.replace("/", "SLASH") + ".rules";
        if (IOUtils.existsInClasspathOrFileSystem(filename)) {

            List<SemgrexPattern> rulesforrel = SemgrexBatchParser.compileStream(IOUtils.getInputStreamFromURLOrClasspathOrFileSystem(filename));
            logger.log("Read " + rulesforrel.size() + " rules from " + filename + " for relation " + rel);
            rules.put(rel, rulesforrel);
        }
    }
}
 
Example #14
Source File: IntelKBPAnnotator.java    From InformationExtraction with GNU General Public License v3.0 5 votes vote down vote up
/**
 * A debugging method to try relation extraction from the console.
 *
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
    Properties props = StringUtils.argsToProperties(args);
    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
    props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");

    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    IOUtils.console("sentence> ", line -> {
        Annotation ann = new Annotation(line);
        pipeline.annotate(ann);
        for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
            sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(System.err::println);
        }
    });
}
 
Example #15
Source File: IntelKBPModel.java    From InformationExtraction with GNU General Public License v3.0 5 votes vote down vote up
public static void main(String[] args) throws IOException {

        IOUtils.console("sentence> ", line -> {
            HashMap<RelationTriple, String> triple = extract(line);
            for (RelationTriple s : triple.keySet()) {
                System.out.println(s);
            }
        });
    }
 
Example #16
Source File: RegexNerTest.java    From InformationExtraction with GNU General Public License v3.0 5 votes vote down vote up
public static void main(String[] args) throws IOException{

        IOUtils.console("sentence> ", line -> {
            List<String> ners = extractNER(line);
            for (String ner : ners) {
                System.out.print(ner + ",");
            }
            System.out.println();
        });
    }
 
Example #17
Source File: DocumentFrequencyCounter.java    From wiseowl with MIT License 5 votes vote down vote up
/**
 * Computes a result, or throws an exception if unable to do so.
 *
 * @return computed result
 * @throws Exception if unable to compute a result
 */
public Counter<String> call() throws Exception {
  // We need to hallucinate some overarching document tag.. because the Gigaword files don't
  // have them :/
  String fileContents = IOUtils.slurpFile(file);
  fileContents = "<docs>" + fileContents + "</docs>";

  return getIDFMapForFile(new StringReader(fileContents));
}
 
Example #18
Source File: CoreNlpTokenizer.java    From jstarcraft-nlp with Apache License 2.0 4 votes vote down vote up
private boolean processInput() {
    Annotation annotation = new Annotation(IOUtils.slurpReader(input));
    pipeline.annotate(annotation);
    sentences = annotation.get(SentencesAnnotation.class).iterator();
    return true;
}