Java Code Examples for opennlp.tools.util.Span

The following are top voted examples for showing how to use opennlp.tools.util.Span. These examples are extracted from open source projects. You can vote up the examples you like and your votes will be used in our system to generate more good examples.
Example 1
Project: habot   File: IntentTrainer.java   View source code 7 votes vote down vote up
public Intent interpret(String query) {
    String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(query);
    double[] outcome = categorizer.categorize(tokens);
    logger.debug(categorizer.getAllResults(outcome));

    Intent intent = new Intent(categorizer.getBestCategory(outcome));

    for (NameFinderME nameFinderME : nameFinderMEs) {
        Span[] spans = nameFinderME.find(tokens);
        String[] names = Span.spansToStrings(spans, tokens);
        for (int i = 0; i < spans.length; i++) {
            intent.getEntities().put(spans[i].getType(), names[i]);
        }
    }

    logger.debug(intent.toString());

    return intent;
}
 
Example 2
Project: wiseowl   File: SentenceTokenizer.java   View source code 6 votes vote down vote up
@Override
public final boolean incrementToken() throws IOException {
 if (sentences == null) {
     fillSentences();
   }
   
   if (tokenOffset >= sentences.length) {
     return false;
   }
   
   Span sentenceSpan = sentences[tokenOffset];
   clearAttributes();
   int start = sentenceSpan.getStart();
   int end   = sentenceSpan.getEnd();
   termAtt.copyBuffer(inputSentence, start, end - start);
   posIncrAtt.setPositionIncrement(1);
   offsetAtt.setOffset(start, end);
   tokenOffset++;
   
   return true;
}
 
Example 3
Project: sirocco   File: PositionsDictionary.java   View source code 6 votes vote down vote up
public void addSpan(Span span, CSList<Character> labels) throws Exception {
    if (span.getStart() == span.getEnd())
    {
        LabelledPosition lpos = this.get(span.getStart());
        if (lpos == null)
        {
            lpos = new LabelledPosition();
            this.put(span.getStart(),lpos);
            lpos.IsSingleSpan = true;
        }
        else
            lpos.IsSingleSpan = false; 
        lpos.IsStart = true;
        lpos.IsEnd = true;
        LabelledPosition.addNewLabels(lpos.StartLabels,labels);
        LabelledPosition.addNewLabels(lpos.EndLabels,labels);
    }
    else
    {
        addStart(span.getStart(),labels);
        addEnd(span.getEnd(),labels);
    } 
}
 
Example 4
Project: sirocco   File: EnglishIndexer.java   View source code 6 votes vote down vote up
public Boolean isGoodAsTopic(String tag) throws Exception {
    String[] tokens = tokenizeSentence(tag);
    String[] postags = posTagTokens(tokens);
    CSList<Parse> phrase = new CSList<Parse>();
    for (int idx = 0;idx < tokens.length;idx++)
    {
        Parse parse = new Parse(tokens[idx], new Span(0,tokens[idx].length() - 1), postags[idx], 1.0, 1);
        phrase.add(parse);
    }
    Boolean goodAsTopic;
    Boolean goodAsTag;
    RefSupport<Boolean> refVar0 = new RefSupport<Boolean>();
    RefSupport<Boolean> refVar1 = new RefSupport<Boolean>();
    isHighValueObject(phrase, refVar0, refVar1);
    goodAsTag = refVar0.getValue();
    goodAsTopic = refVar1.getValue();
    return goodAsTopic;
}
 
Example 5
Project: sirocco   File: EnglishIndexer.java   View source code 6 votes vote down vote up
public void getSentiment(ContentIndex contentindex) throws Exception {
    for (int i = 0;i < contentindex.ParagraphIndexes.length;i++)
    {
        ParagraphIndex pindex = contentindex.ParagraphIndexes[i];
        pindex.SentenceSentiments = new FloatVector[pindex.SentenceCount];
        pindex.IndexedSentences = new String[pindex.SentenceCount];
        pindex.SpanMap = (HashMap<String,Span>[]) new HashMap[pindex.SentenceCount];
        for (int j = 0;j < pindex.SentenceCount;j++)
        {
            // if we do chunking instead of parsing, then use Shallow Accumulation Breaks
            RefSupport<FloatVector> refVar5 = new RefSupport<FloatVector>();
            getSentimentVector(pindex.SentenceParses[j],
            		pindex.SentenceFlags[j],contentindex.ContentParseDepth,refVar5);
            pindex.SentenceSentiments[j] = refVar5.getValue();
            RefSupport<String> refVar6 = new RefSupport<String>();
            RefSupport<HashMap<String,Span>> refVar7 = new RefSupport<HashMap<String,Span>>();
            makeIndexedSentence(pindex.SentenceParses[j],
            		pindex.SentenceFlags[j],pindex.SentenceSentiments[j],refVar6,refVar7);
            pindex.IndexedSentences[j] = refVar6.getValue();
            pindex.SpanMap[j] = refVar7.getValue();
        }
    }
}
 
Example 6
Project: bioasq   File: OpenNlpChunkerConceptProvider.java   View source code 6 votes vote down vote up
@Override
public List<Concept> getConcepts(JCas jcas) throws AnalysisEngineProcessException {
  List<Token> tokens = TypeUtil.getOrderedTokens(jcas);
  String[] texts = tokens.stream().map(Token::getCoveredText).toArray(String[]::new);
  String[] pos = tokens.stream().map(Token::getPartOfSpeech).toArray(String[]::new);
  List<Span> spans = insertOutsideSpans(chunker.chunkAsSpans(texts, pos));
  return IntStream.rangeClosed(0, spans.size() - type.size())
          .mapToObj(i -> spans.subList(i, i + type.size()))
          .filter(spansSublist -> type
                  .equals(spansSublist.stream().map(Span::getType).collect(toList())))
          .map(spansSublist -> tokens.subList(spansSublist.get(0).getStart(),
                  spansSublist.get(spansSublist.size() - 1).getEnd()))
          .filter(toks -> toks.size() >= minLength)
          .map(toks -> TypeFactory.createConceptMention(jcas, getFirstTokenBegin(toks),
                  getLastTokenEnd(toks)))
          .map(cmention -> TypeFactory.createConcept(jcas, cmention,
                  TypeFactory.createConceptType(jcas, "opennlp:" + String.join("-", type))))
          .collect(toList());
}
 
Example 7
Project: opennlp-addons   File: GeoHashBinningScorer.java   View source code 6 votes vote down vote up
@Override
public void score(List<LinkedSpan> linkedSpans, String docText, Span[] sentenceSpans, EntityLinkerProperties properties,  AdminBoundaryContext additionalContext) {
   //Map<Double, Double> latLongs = new HashMap<Double, Double>();
  List<GazetteerEntry> allGazEntries = new ArrayList<>();

  /**
   * collect all the gaz entry references
   */
  for (LinkedSpan<BaseLink> ls : linkedSpans) {
    for (BaseLink bl : ls.getLinkedEntries()) {
      if (bl instanceof GazetteerEntry) {
        allGazEntries.add((GazetteerEntry) bl);
      }
    }
  }
  /**
   * use the point clustering to score each hit
   */
  Map<String, List<GazetteerEntry>> cluster = CLUSTERER.cluster(allGazEntries, PRECISION);
  CLUSTERER.scoreClusters(cluster);

}
 
Example 8
Project: baleen-extras   File: OpenNLPParser.java   View source code 6 votes vote down vote up
private void addParsedAsAnnotations(final JCas jCas, final int offset, final Parse parsed) {
	final String type = parsed.getType();

	// Ignore non phrase types
	if (OpenNLPParser.PHRASE_TYPES.contains(type)) {
		// Otherwise add new ParseChunks

		final Span span = parsed.getSpan();
		final PhraseChunk phraseChunk = new PhraseChunk(jCas);
		phraseChunk.setBegin(offset + span.getStart());
		phraseChunk.setEnd(offset + span.getEnd());
		phraseChunk.setChunkType(parsed.getType());

		addToJCasIndex(phraseChunk);
	}

	Arrays.stream(parsed.getChildren()).forEach(p -> addParsedAsAnnotations(jCas, offset, p));

}
 
Example 9
Project: baleen-extras   File: OpenNLPParser.java   View source code 6 votes vote down vote up
private Parse parseSentence(final Sentence sentence, final Collection<WordToken> tokens) {
	final String text = sentence.getCoveredText();

	final Parse parse = new Parse(text, new Span(0, text.length()), AbstractBottomUpParser.INC_NODE, 1, 0);

	// Add in the POS
	int index = 0;
	for (final WordToken token : tokens) {
		final Span span = new Span(token.getBegin() - sentence.getBegin(), token.getEnd() - sentence.getBegin());

		parse.insert(new Parse(text, span, AbstractBottomUpParser.TOK_NODE, 0, index));
		index++;
	}

	// Parse the sentence
	return parser.parse(parse);
}
 
Example 10
Project: opennlp-enhancer   File: SpanUtil.java   View source code 6 votes vote down vote up
/**
 * Drop overlapping spans.
 *
 * @param spans the spans
 * @return the span[]
 */
public static Span[] dropOverlappingSpans(Span spans[]) {

	List<Span> sortedSpans = new ArrayList<Span>(spans.length);
	Collections.addAll(sortedSpans, spans);
	Collections.sort(sortedSpans);
	Iterator<Span> it = sortedSpans.iterator();
	Span lastSpan = null;
	while (it.hasNext()) {
		Span span = it.next();
		if (span.equals(lastSpan))
			it.remove();

		lastSpan = span;
	}
	return getTaggedSpans(sortedSpans);

}
 
Example 11
Project: opennlp-enhancer   File: SpanUtil.java   View source code 6 votes vote down vote up
/**
 * Single tagging.
 *
 * @param spans the spans
 * @return the span[]
 */
private static Span[] singleTagging(List<Span> spans) {

	Iterator<Span> it = spans.iterator();
	Span lastSpan = null;
	while (it.hasNext()) {
		Span span = it.next();
		if (lastSpan != null) {
			if (lastSpan.intersects(span)) {
				it.remove();
				span = lastSpan;
			}
		}
		lastSpan = span;
	}
	return spans.toArray(new Span[spans.size()]);
}
 
Example 12
Project: opennlp-enhancer   File: SpanUtil.java   View source code 6 votes vote down vote up
/**
 * Gets the named entity.
 *
 * @param tokenSpan the token span
 * @return the named entity
 */
public static NamedEntity[] getNamedEntity(TokenSpan tokenSpan) {
	Span[] spans = tokenSpan.getSpans();
	String[] tokens = tokenSpan.getTokens();

	String[] spanText = Span.spansToStrings(spans, tokens);
	NamedEntity[] namedEntities = new NamedEntity[spans.length];

	for (int i = 0; i < spans.length; i++) {
		NamedEntity entity = new NamedEntity();
		entity.setEntity(spanText[i]);
		entity.setType(spans[i].getType().split("\\|"));
		namedEntities[i] = entity;
	}

	return namedEntities;
}
 
Example 13
Project: elasticsearch-ingest-opennlp   File: OpenNlpService.java   View source code 6 votes vote down vote up
public Set<String> find(String content, String field) {
    try {
        if (!nameFinderModels.containsKey(field)) {
            throw new ElasticsearchException("Could not find field [{}], possible values {}", field, nameFinderModels.keySet());
        }
        TokenNameFinderModel finderModel= nameFinderModels.get(field);
        if (threadLocal.get() == null || !threadLocal.get().equals(finderModel)) {
            threadLocal.set(finderModel);
        }

        String[] tokens = SimpleTokenizer.INSTANCE.tokenize(content);
        Span spans[] = new NameFinderME(finderModel).find(tokens);
        String[] names = Span.spansToStrings(spans, tokens);
        return Sets.newHashSet(names);
    } finally {
        threadLocal.remove();
    }
}
 
Example 14
Project: Mutters   File: TestNER.java   View source code 6 votes vote down vote up
@Test
public void testPersonNER()
  throws Exception
{
  URL modelUrl = Thread.currentThread().getContextClassLoader()
      .getResource("models/en-ner-persons.bin");
  assertThat(modelUrl, is(notNullValue()));

  TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
  assertThat(model, is(notNullValue()));

  NameFinderME nameFinder = new NameFinderME(model);
  String[] tokens = SimpleTokenizer.INSTANCE
      .tokenize("Mr. John Smith of New York, married Anne Green of London today.");
  assertThat(tokens.length, is(15));

  Span[] spans = nameFinder.find(tokens);
  assertThat(spans.length, is(2));

  String[] names = Span.spansToStrings(spans, tokens);
  assertThat(names.length, is(2));
  assertThat(names[0], is("John Smith"));
  assertThat(names[1], is("Anne Green"));
}
 
Example 15
Project: Mutters   File: TestNER.java   View source code 6 votes vote down vote up
@Test
public void testLocationNER()
  throws Exception
{
  URL modelUrl = Thread.currentThread().getContextClassLoader()
      .getResource("models/en-ner-locations.bin");
  assertThat(modelUrl, is(notNullValue()));

  TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
  assertThat(model, is(notNullValue()));

  NameFinderME nameFinder = new NameFinderME(model);
  String[] tokens = SimpleTokenizer.INSTANCE
      .tokenize("Mr. John Smith of New York, married Anne Green of London today.");
  assertThat(tokens.length, is(15));

  Span[] spans = nameFinder.find(tokens);
  assertThat(spans.length, is(2));

  String[] locations = Span.spansToStrings(spans, tokens);
  assertThat(locations.length, is(2));
  assertThat(locations[0], is("New York"));
  assertThat(locations[1], is("London"));
}
 
Example 16
Project: Mutters   File: TestNER.java   View source code 6 votes vote down vote up
@Test
public void testDateNER()
  throws Exception
{
  URL modelUrl = Thread.currentThread().getContextClassLoader()
      .getResource("models/en-ner-dates.bin");
  assertThat(modelUrl, is(notNullValue()));

  TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
  assertThat(model, is(notNullValue()));

  NameFinderME nameFinder = new NameFinderME(model);
  String[] tokens = SimpleTokenizer.INSTANCE
      .tokenize("Mr. John Smith of New York, married Anne Green of London today.");
  assertThat(tokens.length, is(15));

  Span[] spans = nameFinder.find(tokens);
  assertThat(spans.length, is(1));

  String[] locations = Span.spansToStrings(spans, tokens);
  assertThat(locations.length, is(1));
  assertThat(locations[0], is("today"));
}
 
Example 17
Project: Mutters   File: TestNER.java   View source code 6 votes vote down vote up
@Test
public void testAddressNER()
  throws Exception
{
  URL modelUrl = Thread.currentThread().getContextClassLoader()
      .getResource("models/en-ner-address.bin");
  assertThat(modelUrl, is(notNullValue()));

  TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
  assertThat(model, is(notNullValue()));

  NameFinderME nameFinder = new NameFinderME(model);
  String[] tokens = SimpleTokenizer.INSTANCE.tokenize("Send a taxi to 12 Pleasent Street");
  Span[] spans = nameFinder.find(tokens);
  assertThat(spans.length, is(1));

  String[] locations = Span.spansToStrings(spans, tokens);
  assertThat(locations.length, is(1));
  assertThat(locations[0], is("12 Pleasent Street"));
}
 
Example 18
Project: AgePredictor   File: AgeClassifyFineGrainedReportListener.java   View source code 6 votes vote down vote up
private void updateTagFMeasure(String[] refs, String[] preds) {
    // create a set with all tags
    Set<String> tags = new HashSet<String>(Arrays.asList(refs));
    tags.addAll(Arrays.asList(preds));

    // create samples for each tag
    for (String tag : tags) {
	List<Span> reference = new ArrayList<Span>();
	List<Span> prediction = new ArrayList<Span>();
	for (int i = 0; i < refs.length; i++) {
	    if (refs[i].equals(tag)) {
		reference.add(new Span(i, i + 1));
	    }
	    if (preds[i].equals(tag)) {
		prediction.add(new Span(i, i + 1));
	    }
	}
	if (!this.tagFMeasure.containsKey(tag)) {
	    this.tagFMeasure.put(tag, new FMeasure());
	}
	// populate the fmeasure
	this.tagFMeasure.get(tag).updateScores(
					       reference.toArray(new Span[reference.size()]),
					       prediction.toArray(new Span[prediction.size()]));
    }
}
 
Example 19
Project: elasticsearch-analysis-opennlp   File: NamedEntityRecognitionOperation.java   View source code 6 votes vote down vote up
public String[] createAll(String[] words) {
    Span[] nerSpans;
    synchronized(nameFinder) {
        nerSpans = nameFinder.find(words);
        nameFinder.clearAdaptiveData();
    }
    String[] nerTags = new String[words.length];
    if (nerSpans.length == 0) {
        return nerTags;
    }
    String tag = nerSpans[0].getType();
    for (Span tagged : nerSpans) {
        for (int j = tagged.getStart(); j < tagged.getEnd(); j++) {
            nerTags[j] = tag;
        }
    }
    return nerTags;
}
 
Example 20
Project: story-inspector   File: OpenNLPStoryParser.java   View source code 6 votes vote down vote up
/**
 * Generate a list of tokens for the specified sentence.
 *
 * @param sentenceRange
 *            The {@link TextRange} of the sentence to tokenize
 * @param paragraph
 *            The {@link ExtractedParagraph} the sentence is within
 * @param paragraphStartCharIndex
 *            The story character index of the beginning of the paragraph. Note that only paragraphs of {@link ParagraphType.TEXT} are counted
 *            when considering the overall story character index.
 * @return
 */
private List<TokenImpl> generateTokensFromSentence(final TextRange sentenceRange, final ExtractedParagraph paragraph,
		final int paragraphStartCharIndex) {
	// Get sentence string and tokenize it
	final String sentence = paragraph.getText().substring(sentenceRange.getStartIndex() - paragraphStartCharIndex,
			sentenceRange.getEndIndex() - paragraphStartCharIndex);
	final Span[] tokenSpans = this.tokenizer.tokenizePos(sentence.toString());
	final String[] tokenTexts = Span.spansToStrings(tokenSpans, sentence);

	// Do part of speech tagging on token strings
	final String[] tokenPosTags = this.posTagger.tag(tokenTexts);

	// Create token objects
	// Generate the ranges the tokens cover. This will make the tokens cover any white space within the sentence as well
	final List<TextRange> ranges = generateTextRangesFromSpans(tokenSpans, sentenceRange);
	final List<TokenImpl> tokens = new ArrayList<>(ranges.size());
	for (int i = 0; i < ranges.size(); ++i) {
		tokens.add(constructToken(ranges.get(i), tokenTexts[i], tokenPosTags[i], paragraph, paragraphStartCharIndex));
	}
	return tokens;
}
 
Example 21
Project: story-inspector   File: OpenNLPStoryParser.java   View source code 6 votes vote down vote up
/**
 * Given an array of {@link Span}s localized to a text range, expand the spans as necessary to fully cover the range and convert to a list of
 * {@link TextRange}s.
 *
 * Do this by expanding the beginning of each span if necessary and finally, the end of the last span if necessary.
 *
 * @param spans
 *            The spans to expand
 * @param range
 *            The range to cover
 * @return The list of {@link TextRange}s that cover the range.
 */
private List<TextRange> generateTextRangesFromSpans(final Span[] spans, final TextRange range) {
	int currentCharIndex = range.getStartIndex();
	final int endingCharIndex = range.getEndIndex();

	final List<TextRange> ranges = new ArrayList<>(spans.length);
	for (final Span tokenSpan : spans) {
		// Get the global position of the end of the current span
		final int rangeEnd = range.getStartIndex() + tokenSpan.getEnd();
		// Create a new range from the point we have covered so far to the end of the current span, might expand the beginning of the current span
		ranges.add(new TextRange(currentCharIndex, rangeEnd));
		currentCharIndex = rangeEnd;
	}

	// Extend end of last range if necessary to reach the end of the range we must cover
	final TextRange lastRange = ranges.get(ranges.size() - 1);
	if (lastRange.getEndIndex() < endingCharIndex) {
		ranges.set(ranges.size() - 1, new TextRange(lastRange.getStartIndex(), endingCharIndex));
	}

	return ranges;
}
 
Example 22
Project: GeoParsingNSF   File: NameEntityExtractor.java   View source code 6 votes vote down vote up
public void getAllNameEntitiesfromInput(InputStream stream)
		throws InvalidFormatException, IOException {

	InputStream modelIn = new FileInputStream(nerModelPath);
	TokenNameFinderModel model = new TokenNameFinderModel(modelIn);
	NameFinderME nameFinder = new NameFinderME(model);
	String[] in = IOUtils.toString(stream, "UTF-8").split(" ");

	Span nameE[] = nameFinder.find(in);

	String spanNames = Arrays.toString(Span.spansToStrings(nameE, in));
	spanNames = spanNames.substring(1, spanNames.length() - 1);
	modelIn.close();
	String[] tmp = spanNames.split(",");

	for (String name : tmp) {
		name = name.trim();
		this.locationNameEntities.add(name);
	}
	
	
}
 
Example 23
Project: spimedb   File: NLP.java   View source code 6 votes vote down vote up
@Nullable public static Map<String,String[]> names(String input) {

        NameFinderME[] finders = (NameFinderME[]) models.get(TokenNameFinderModel.class);
        String[] tokens = tokenizer().tokenize(input);

        Map<String,String[]> x = new HashMap(finders.length);
        for (NameFinderME m : finders) {
            Span[] ss = m.find(tokens);
            if (ss.length > 0)
                x.put(ss[0].getType(), Span.spansToStrings(ss, tokens));
        }

        if (!x.isEmpty()) {
            return x;
        } else {
            return null;
        }
    }
 
Example 24
Project: baleen   File: OpenNLP.java   View source code 6 votes vote down vote up
/**
 * Use the OpenNLP Sentence Detector to detect sentences and add them to the JCas index
 */
private List<Sentence> createBaseSentences(TextBlock block)
		throws AnalysisEngineProcessException {
	List<Sentence> sentences = new ArrayList<>();

	try {
		String text = block.getCoveredText();
		Span[] sentenceSpans = sentenceDetector.sentPosDetect(text);

		for (Span sentSpan : sentenceSpans) {
			Sentence sent =
					block.newAnnotation(Sentence.class, sentSpan.getStart(), sentSpan.getEnd());

			addToJCasIndex(sent);
			sentences.add(sent);
		}
	} catch (Exception e) {
		throw new AnalysisEngineProcessException(e);
	}

	return sentences;
}
 
Example 25
Project: baleen   File: OpenNLPParser.java   View source code 6 votes vote down vote up
/**
 * Adds the parsed as annotations.
 *
 * @param jCas
 *            the j cas
 * @param offset
 *            the offset
 * @param parsed
 *            the parsed
 */
private void addParsedAsAnnotations(final JCas jCas, final int offset, final Parse parsed) {
	final String type = parsed.getType();

	// Ignore non phrase types
	if (OpenNLPParser.PHRASE_TYPES.contains(type)) {
		// Otherwise add new ParseChunks

		final Span span = parsed.getSpan();
		final PhraseChunk phraseChunk = new PhraseChunk(jCas);
		phraseChunk.setBegin(offset + span.getStart());
		phraseChunk.setEnd(offset + span.getEnd());
		phraseChunk.setChunkType(parsed.getType());

		addToJCasIndex(phraseChunk);
	}

	Arrays.stream(parsed.getChildren()).forEach(p -> addParsedAsAnnotations(jCas, offset, p));

}
 
Example 26
Project: baleen   File: OpenNLPParser.java   View source code 6 votes vote down vote up
/**
 * Parses the sentence.
 *
 * @param sentence
 *            the sentence
 * @param tokens
 *            the tokens
 * @return the parses the
 */
private Parse parseSentence(final Sentence sentence, final Collection<WordToken> tokens) {
	final String text = sentence.getCoveredText();

	final Parse parse = new Parse(text, new Span(0, text.length()), AbstractBottomUpParser.INC_NODE, 1, 0);

	// Add in the POS
	int index = 0;
	for (final WordToken token : tokens) {
		final Span span = new Span(token.getBegin() - sentence.getBegin(), token.getEnd() - sentence.getBegin());

		parse.insert(new Parse(text, span, AbstractBottomUpParser.TOK_NODE, 0, index));
		index++;
	}

	// Parse the sentence
	return parser.parse(parse);
}
 
Example 27
Project: schemas   File: BasicEventAnalyzer.java   View source code 6 votes vote down vote up
/**
 * @return True if the tree contains the entire context span (char based span)
 * Overlaps are returned false.
 */
private boolean containsContext(Tree full, Tree subtree, MentionContext mc) {
  // Get the offset of the subtree
  int offset = TreeOperator.inorderTraverse(full, subtree);
  Span span = mc.getSpan();

  //    if( span.getEnd() < offset ) return false;
  if( span.getStart() < offset ) {
    //      System.out.println("Not contained");
    return false;
  }

  // Get the length of the subtree
  int length = TreeOperator.treeStringLength(subtree);
  if( span.getEnd() <= (offset+length) ) return true;

  return false;
}
 
Example 28
Project: java_in_examples   File: OpenNLPSentenceDetectionTest.java   View source code 6 votes vote down vote up
public static void main(String[] strings) throws Exception {
    String text = "“But I don’t want to go among mad people,” Alice remarked. " +
            "“Oh, you can’t help that,” said the Cat: “we’re all mad here. I’m mad. You’re mad.” " +
            "“How do you know I’m mad?” said Alice. " +
            "“You must be,” said the Cat, “or you wouldn’t have come here.”";

    try (InputStream modelIn = new FileInputStream(NATURAL_LANGUAGE_PROCESSING_SRC_MAIN_RESOURCES_EN_SENT_BIN)) {
        SentenceModel model = new SentenceModel(modelIn);
        SentenceDetectorME sentenceDetector = new SentenceDetectorME(model);
        String sentences[] = sentenceDetector.sentDetect(text);
        Span sentences2[] = sentenceDetector.sentPosDetect(text);
        for (String sentence : sentences) {
            System.out.println(sentence);
        }
        System.out.println(Arrays.deepToString(sentences2));
    }
}
 
Example 29
Project: diskoveror-ta   File: OpenNLP.java   View source code 6 votes vote down vote up
@Override
public List<String> getEntities(EntityType entityCat, String text)
{
    NameFinderME temp = getNameFinderModel(entityCat);
    List<String> entityList = new ArrayList<String>();
    String [] tokens=null;
    tokens = tokenizer.tokenize(text);

    Span nameSpans[] = temp.find(tokens);

    for(Span s: nameSpans)
    {
        StringBuilder sb = new StringBuilder();
        for(int i=s.getStart();i<s.getEnd();i++){
            sb.append(tokens[i]+" ");
        }
        sb.deleteCharAt(sb.length()-1);
        entityList.add(sb.toString());
    }
    return entityList;
}
 
Example 30
Project: openimaj   File: OpenNLPPersonAnnotator.java   View source code 6 votes vote down vote up
@Override
	void performAnnotation(RawTextAnnotation annotation)
			throws MissingRequiredAnnotationException {


			  for (SentenceAnnotation sentence : annotation.getAnnotationsFor(SentenceAnnotation.class)) {
				  List<TokenAnnotation> atoks = sentence.getAnnotationsFor(TokenAnnotation.class);
				  List<String> toks = AnnotationUtils.getStringTokensFromTokenAnnotationList(atoks);
			    Span nameSpans[] = nameFinder.find(AnnotationUtils.ListToArray(toks));
			    for(Span s :nameSpans){
//			    	NamedEntityAnnotation nea = new NamedEntityAnnotation();
//			    	NamedEntity ne = new NamedEntity();
			    	for(int i = s.getStart();i<s.getEnd();i++){
			    		atoks.get(i).addAnnotation(annotation);
			    	}
			    }
			  }
			  nameFinder.clearAdaptiveData();

	}
 
Example 31
Project: DTAEngine   File: OpenNLP.java   View source code 6 votes vote down vote up
@Override
public List<String> getEntities(EntityType entityCat, String text)
{
    NameFinderME temp = getNameFinderModel(entityCat);
    List<String> entityList = new ArrayList<String>();
    String [] tokens=null;
    tokens = tokenizer.tokenize(text);

    Span nameSpans[] = temp.find(tokens);

    for(Span s: nameSpans)
    {
        StringBuilder sb = new StringBuilder();
        for(int i=s.getStart();i<s.getEnd();i++){
            sb.append(tokens[i]+" ");
        }
        sb.deleteCharAt(sb.length()-1);
        entityList.add(sb.toString());
    }
    return entityList;
}
 
Example 32
Project: knowledge-extraction   File: CorefParse.java   View source code 6 votes vote down vote up
private void print(Parse p, int deep) {
	if (p.getType().length() > 1 && p.getType().substring(0, 2).equals(Parser.TOK_NODE))
		return;
	
	char[] spaces = new char[deep*2];
	Arrays.fill(spaces, ' ');
	Span span = p.getSpan();
    System.out.print(new String(spaces) + p.getType() + " -- " + p.getText().substring(span.getStart(),
			span.getEnd()));
    if (parseMap.containsKey(p)) {
		System.out.print("#" + parseMap.get(p));
	}
    System.out.print("\n");
    for (Parse child : p.getChildren()) {
    	print(child, new Integer(deep + 1));
    }
}
 
Example 33
Project: karma-information-extraction   File: OpenNLPExtraction.java   View source code 6 votes vote down vote up
public List<Extraction> findPeople(InputExtraction paragraph) {
	Span names[] = null;
	Set<Extraction> people = new HashSet<Extraction>();

	String[] st = paragraph.getText().split("\\s");
	names = nameDetector.find(st);
				
	String[] namesStr = Span.spansToStrings(names, st);
	
	for (String str : namesStr) {
		Extraction e = new Extraction();
		e.setExtraction(str);
		people.add(e);
	}
		
	return new ArrayList<Extraction>(people);
}
 
Example 34
Project: karma-information-extraction   File: OpenNLPExtraction.java   View source code 6 votes vote down vote up
public List<Extraction> findPlaces(InputExtraction paragraph) {
	Span names[] = null;
	Set<Extraction> places = new HashSet<Extraction>();

		String[] st = paragraph.getText().split("\\s");
		names = locationDetector.find(st);
		
		String[] namesStr = Span.spansToStrings(names, st);

		for (String str : namesStr) {
			Extraction e = new Extraction();
			e.setExtraction(str);
			places.add(e);
		}

	return new ArrayList<Extraction>(places);
}
 
Example 35
Project: karma-information-extraction   File: OpenNLPExtraction.java   View source code 6 votes vote down vote up
public List<Extraction> findDates(InputExtraction paragraph) {
	Span names[] = null;
	Set<Extraction> dates = new HashSet<Extraction>();

		String[] st = paragraph.getText().split("\\s");
		names = dateDetector.find(st);
		
		String[] namesStr = Span.spansToStrings(names, st);

		for (String str : namesStr) {
			Extraction e = new Extraction();
			e.setExtraction(str);
			dates.add(e);
		}

	return new ArrayList<Extraction>(dates);
}
 
Example 36
Project: jate   File: OpenNLPNounPhraseFilter.java   View source code 6 votes vote down vote up
private Span[] createSpan(String[] tags) {
    int start=-1;
    List<Span> result =new ArrayList<>();
    for(int i=0; i<tags.length;i++){
        if(tags[i].equalsIgnoreCase(npChunker.getContinueTag())){
            //do nothing
        }
        else{
            if(start!=-1){
                result.add(new Span(start, i, "NP"));
                if(tags[i].equalsIgnoreCase(npChunker.getStartTag()))
                    start=i;
                else
                    start=-1;
            }else if(tags[i].equalsIgnoreCase(npChunker.getStartTag())){
                start=i;
            }
        }
    }
    if(start!=-1){
        result.add(new Span(start, tags.length,"NP"));
    }
    return result.toArray(new Span[0]);
}
 
Example 37
Project: ViTA   File: OpenNlpParser.java   View source code 6 votes vote down vote up
private Integer annotate(Parse p, Long sentStart)
  throws gate.util.InvalidOffsetException {

  List<Integer> childIDs = new ArrayList<Integer>();
  Parse[] children = p.getChildren();
  for(Parse cp : children) {
    Integer childID = annotate(cp, sentStart);
    if(childID >= 0) childIDs.add(childID);
  }

  String type = p.getType();
  if(type.equals("TK")) return -1;

  Span span = p.getSpan();
  Long start = sentStart + span.getStart();
  Long end = sentStart + span.getEnd();

  FeatureMap fm = gate.Factory.newFeatureMap();
  String text = document.getContent().getContent(start, end).toString();
  fm.put("text", text);
  fm.put("cat", p.getType());
  if(!childIDs.isEmpty()) fm.put("consists", childIDs);

  return annotations.add(start, end, "SyntaxTreeNode", fm);
}
 
Example 38
Project: relex   File: DocSplitterOpenNLP15Impl.java   View source code 6 votes vote down vote up
/**
 * Get the next sentence out of the buffered text.
 * Return null if there are no complete sentences in the buffer.
 */
public String getNextSentence()
{
	// punt if no sentence detector
	if (detector == null)
	{
		String rc = buffer;
		buffer = null;
		return rc;
	}

	Span spans[] = detector.sentPosDetect(buffer);
	if (0 == spans.length) return null;

	start = 0;
	for (Span span : spans)
	{
		end = span.getEnd();
		if (foundSentence(buffer)) break;
	}
	if (!foundSentence(buffer)) return null;

	buffer = buffer.substring(trimmedEnd);
	return trimmedSentence;
}
 
Example 39
Project: GettinCRAFTy   File: Tokenizer.java   View source code 6 votes vote down vote up
public void execute() throws ExecutionException {
  AnnotationSet outputAS = document.getAnnotations(annotationSetName);

  String text = document.getContent().toString();

  Span[] tokens = tokenizer.getTokens(text);
  try {
    for(Span token : tokens) {
      FeatureMap features = Factory.newFeatureMap();
      features.put(ANNIEConstants.TOKEN_STRING_FEATURE_NAME,
              text.substring(token.getStart(), token.getEnd()));

      outputAS.add((long)token.getStart(), (long)token.getEnd(),
              ANNIEConstants.TOKEN_ANNOTATION_TYPE, features);
    }
  } catch(Exception e) {
    throw new ExecutionException("error running tokenizer", e);
  }
}
 
Example 40
Project: ixa-pipe-pos   File: MultiWordSample.java   View source code 6 votes vote down vote up
public MultiWordSample(final String id, final String[] sentence,
    Span[] multiwords) {

  this.id = id;
  if (sentence == null) {
    throw new IllegalArgumentException("sentence must not be null!");
  }
  if (multiwords == null) {
    multiwords = new Span[0];
  }
  this.tokens = Collections.unmodifiableList(new ArrayList<String>(Arrays
      .asList(sentence)));
  this.names = Collections.unmodifiableList(new ArrayList<Span>(Arrays
      .asList(multiwords)));
  // TODO: Check that multiword spans are not overlapping, otherwise throw
  // exception
}