de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence Java Examples

The following examples show how to use de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: WebAnnoTsv3WriterTestBase.java    From webanno with Apache License 2.0 6 votes vote down vote up
@Test
public void testZeroWidthAnnotationBeforeFirstTokenIsMovedToBeginOfFirstToken() throws Exception
{
    JCas jcas = JCasFactory.createJCas();
    
    DocumentMetaData.create(jcas).setDocumentId("doc");
    jcas.setDocumentText("  one two");
    new Token(jcas, 2, 5).addToIndexes();
    new Token(jcas, 6, 9).addToIndexes();
    new Sentence(jcas, 2, 9).addToIndexes();
    
    // NE is after the end of the last token and should be moved to the end of the last token
    // otherwise it could not be represented in the TSV3 format.
    new NamedEntity(jcas, 1, 1).addToIndexes();
    
    writeAndAssertEquals(jcas);
}
 
Example #2
Source File: HtmlAnnotationEditor.java    From inception with Apache License 2.0 6 votes vote down vote up
private List<AnnotationLayer> getLayersToRender()
{
    AnnotatorState state = getModelObject();
    List<AnnotationLayer> layersToRender = new ArrayList<>();
    for (AnnotationLayer layer : state.getAnnotationLayers()) {
        boolean isSegmentationLayer = layer.getName().equals(Token.class.getName())
                || layer.getName().equals(Sentence.class.getName());
        boolean isUnsupportedLayer = layer.getType().equals(CHAIN_TYPE)
                && (state.getMode().equals(Mode.AUTOMATION)
                        || state.getMode().equals(Mode.CORRECTION)
                        || state.getMode().equals(Mode.CURATION));

        if (layer.isEnabled() && !isSegmentationLayer && !isUnsupportedLayer) {
            layersToRender.add(layer);
        }
    }
    return layersToRender;
}
 
Example #3
Source File: SpanRendererTest.java    From webanno with Apache License 2.0 6 votes vote down vote up
@Test
public void thatSpanCrossSentenceBehaviorOnRenderGeneratesErrors()
{
    neLayer.setCrossSentence(false);
    
    jcas.setDocumentText(StringUtils.repeat("a", 20));
    
    new Sentence(jcas, 0, 10).addToIndexes();
    new Sentence(jcas, 10, 20).addToIndexes();
    NamedEntity ne = new NamedEntity(jcas, 5, 15);
    ne.addToIndexes();
    
    SpanAdapter adapter = new SpanAdapter(layerSupportRegistry, featureSupportRegistry, null,
            neLayer, () -> asList(), asList(new SpanCrossSentenceBehavior()));
    
    SpanRenderer sut = new SpanRenderer(adapter, layerSupportRegistry, featureSupportRegistry,
            asList(new SpanCrossSentenceBehavior()));
    
    VDocument vdoc = new VDocument();
    sut.render(jcas.getCas(), asList(), vdoc, 0, jcas.getDocumentText().length());
    
    assertThat(vdoc.comments())
            .usingFieldByFieldElementComparator()
            .containsExactlyInAnyOrder(new VComment(ne, VCommentType.ERROR, 
                    "Crossing sentence boundaries is not permitted."));
}
 
Example #4
Source File: Tokens.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
public static Tokens getTokensFromJCas(JCas jCas) {
  Tokens tokens = new Tokens();
  int s_number = 0; //DKPro does not give sentence index????????
  int t_number = 0;
  for (Sentence sentence : select(jCas, Sentence.class)) {
    List<de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token> dktokens = selectCovered(jCas,
        de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token.class, sentence);
    for (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token t : dktokens) {
      CoreLabel taggedWord = CoreNlpUtils.tokenToWord(
          t); //This step should be avoided. Transform directly from DKPRO to AIDA TOKEN. Problem POS mappings. AIDA works with Stanford tags
      Token aidaToken = new Token(t_number, t.getCoveredText(), t.getBegin(), t.getEnd(), 0);
      aidaToken.setPOS(taggedWord.get(CoreAnnotations.PartOfSpeechAnnotation.class));
      aidaToken.setSentence(s_number);
      tokens.addToken(aidaToken);
      t_number++;
    }
    s_number++;
  }
  return tokens;
}
 
Example #5
Source File: StanfordTokenizer.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
  String text = aJCas.getDocumentText();
  Annotation document = new Annotation(text);
  StanfordCoreNLP stanfordCoreNLP;

  if(!languageMap.containsKey(aJCas.getDocumentLanguage())) {
    throw new AnalysisEngineProcessException(new LanguageNotSupportedException("Language Not Supported"));
  }

  stanfordCoreNLP = stanfordCoreNLPs[languageMap.get(aJCas.getDocumentLanguage())];

  stanfordCoreNLP.annotate(document);
  List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
  for (CoreMap sentence : sentences) {
    int sstart = sentence.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
    int ssend = sentence.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
    Sentence jsentence = new Sentence(aJCas, sstart, ssend);
    jsentence.addToIndexes();

    for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
      Token casToken = new Token(aJCas, token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class), token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
      casToken.addToIndexes();
    }
  }
}
 
Example #6
Source File: LineOrientedTextReaderTest.java    From webanno with Apache License 2.0 6 votes vote down vote up
@Test
public void test()
    throws Exception
{
    JCas doc = JCasFactory.createJCas();

    CollectionReader reader = createReader(LineOrientedTextReader.class,
            LineOrientedTextReader.PARAM_SOURCE_LOCATION, "LICENSE.txt");

    reader.getNext(doc.getCas());
    
    // select(doc, Sentence.class).forEach(s -> System.out.println(s.getCoveredText()));
    
    assertEquals(169, select(doc, Sentence.class).size());
    assertEquals(0, select(doc, Token.class).size());
}
 
Example #7
Source File: OpenNlpNerRecommender.java    From inception with Apache License 2.0 6 votes vote down vote up
private List<NameSample> extractNameSamples(List<CAS> aCasses)
{
    List<NameSample> nameSamples = new ArrayList<>();
    
    casses: for (CAS cas : aCasses) {
        Type sentenceType = getType(cas, Sentence.class);
        Type tokenType = getType(cas, Token.class);

        Map<AnnotationFS, List<AnnotationFS>> sentences = indexCovered(
                cas, sentenceType, tokenType);
        for (Entry<AnnotationFS, List<AnnotationFS>> e : sentences.entrySet()) {
            if (nameSamples.size() >= traits.getTrainingSetSizeLimit()) {
                break casses;
            }
            
            AnnotationFS sentence = e.getKey();
            Collection<AnnotationFS> tokens = e.getValue();
            NameSample nameSample = createNameSample(cas, sentence, tokens);
            if (nameSample.getNames().length > 0) {
                nameSamples.add(nameSample);
            }
        }
    }
    
    return nameSamples;
}
 
Example #8
Source File: AbstractArgumentHITCreator.java    From argument-reasoning-comprehension-task with Apache License 2.0 6 votes vote down vote up
public static List<HITSentence> extractSentences(StandaloneArgument argument)
        throws IOException
{

    // extract sentences
    List<HITSentence> result = new ArrayList<>();

    ArrayList<Sentence> sentences = new ArrayList<>(
            JCasUtil.select(argument.getJCas(), Sentence.class));
    for (int i = 0; i < sentences.size(); i++) {
        Sentence sentence = sentences.get(i);

        HITSentence s = new HITSentence();
        // position
        s.position = i;
        // create unique id by combining argument id and sentence position
        s.sentenceId = StandaloneArgument.getSentenceID(argument, s.position);
        s.text = sentence.getCoveredText();

        result.add(s);
    }

    return result;

}
 
Example #9
Source File: SuggestionViewPanel.java    From webanno with Apache License 2.0 6 votes vote down vote up
private String render(CAS aCas, AnnotatorState aBratAnnotatorModel,
        ColoringStrategy aCurationColoringStrategy)
    throws IOException
{
    List<AnnotationLayer> layersToRender = new ArrayList<>();
    for (AnnotationLayer layer : aBratAnnotatorModel.getAnnotationLayers()) {
        boolean isSegmentationLayer = layer.getName().equals(Token.class.getName())
                || layer.getName().equals(Sentence.class.getName());
        boolean isUnsupportedLayer = layer.getType().equals(CHAIN_TYPE);

        if (layer.isEnabled() && !isSegmentationLayer && !isUnsupportedLayer) {
            layersToRender.add(layer);
        }
    }

    VDocument vdoc = new VDocument();
    preRenderer.render(vdoc, aBratAnnotatorModel.getWindowBeginOffset(),
            aBratAnnotatorModel.getWindowEndOffset(), aCas, layersToRender);

    GetDocumentResponse response = new GetDocumentResponse();
    BratRenderer renderer = new BratRenderer(schemaService, coloringService);
    renderer.render(response, aBratAnnotatorModel, vdoc, aCas, aCurationColoringStrategy);
    return JSONUtil.toInterpretableJsonString(response);
}
 
Example #10
Source File: SuggestionBuilder.java    From webanno with Apache License 2.0 6 votes vote down vote up
/**
 * Puts CASes into a list and get a random annotation document that will be used as a base for
 * the diff.
 */
private void updateSegment(AnnotatorState aBratAnnotatorModel,
        Map<Integer, Integer> aIdxSentenceBeginEnd,
        Map<Integer, Integer> aIdxSentenceBeginNumber,
        Map<String, Map<Integer, Integer>> aSegmentAdress, CAS aCas, String aUsername,
        int aWindowStart, int aWindowEnd)
{
    diffRangeBegin = aWindowStart;
    diffRangeEnd = aWindowEnd;

    // Get the number of the first sentence - instead of fetching the number over and over
    // we can just increment this one.
    int sentenceNumber = WebAnnoCasUtil.getSentenceNumber(aCas, diffRangeBegin);

    aSegmentAdress.put(aUsername, new HashMap<>());
    Type sentenceType = CasUtil.getType(aCas, Sentence.class);
    for (AnnotationFS sentence : selectCovered(aCas, sentenceType, diffRangeBegin,
            diffRangeEnd)) {
        aIdxSentenceBeginEnd.put(sentence.getBegin(), sentence.getEnd());
        aIdxSentenceBeginNumber.put(sentence.getBegin(), sentenceNumber);
        aSegmentAdress.get(aUsername).put(sentence.getBegin(), getAddr(sentence));
        sentenceNumber += 1;
    }
}
 
Example #11
Source File: SpanAdapterTest.java    From webanno with Apache License 2.0 6 votes vote down vote up
@Test
public void thatSpanCrossSentenceBehaviorOnValidateReturnsErrorMessage()
    throws AnnotationException
{
    TokenBuilder<Token, Sentence> builder = new TokenBuilder<>(Token.class, Sentence.class);
    builder.buildTokens(jcas, "This is a test .\nThis is sentence two .");

    SpanAdapter sut = new SpanAdapter(layerSupportRegistry, featureSupportRegistry, null,
            neLayer, () -> asList(), behaviors);

    // Add two annotations
    neLayer.setCrossSentence(true);
    sut.add(document, username, jcas.getCas(), 0, jcas.getDocumentText().length());
    
    //Validation fails
    neLayer.setCrossSentence(false);
    assertThat(sut.validate(jcas.getCas()))
            .extracting(Pair::getLeft)
            .usingElementComparatorIgnoringFields("source", "message")
            .containsExactly(LogMessage.error(null, ""));
}
 
Example #12
Source File: WebAnnoTsv3WriterTestBase.java    From webanno with Apache License 2.0 6 votes vote down vote up
private static JCas makeJCasOneSentence(String aText) throws UIMAException
{
    JCas jcas = makeJCas();
    
    TokenBuilder<Token, Sentence> tb = new TokenBuilder<>(Token.class,
            Sentence.class);
    tb.buildTokens(jcas, aText);
    
    // Remove the sentences generated by the token builder which treats the line break as a
    // sentence break
    for (Sentence s : select(jcas, Sentence.class)) {
        s.removeFromIndexes();
    }
    
    // Add a new sentence covering the whole text
    new Sentence(jcas, 0, jcas.getDocumentText().length()).addToIndexes();
    
    return jcas;
}
 
Example #13
Source File: WebAnnoTsv3WriterTestBase.java    From webanno with Apache License 2.0 6 votes vote down vote up
@Test
public void testElevatedType() throws Exception {
    JCas jcas = JCasFactory.createJCas();
    
    DocumentMetaData.create(jcas).setDocumentId("doc");
    jcas.setDocumentText("John");
    
    // Add an elevated type which is not a direct subtype of Annotation. This type not be picked
    // up by the schema analyzer but should still be serialized as the POS type which is in fact
    // picked up.
    POS_NOUN pos = new POS_NOUN(jcas, 0, 4);
    pos.setPosValue("NN");
    pos.setCoarseValue("NOUN");
    pos.addToIndexes();
    
    Token t = new Token(jcas, 0, 4);
    t.setPos(pos);
    t.addToIndexes();
    new Sentence(jcas, 0, 4).addToIndexes();
            
    writeAndAssertEquals(jcas);
}
 
Example #14
Source File: EntityHeadingFeature.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
@Override public Map<Integer, Double> extract(JCas jCas) {
  Map<Integer, Double> features = new HashMap<>();

  // Heading ends after two sentences.
  Collection<Sentence> sentences = JCasUtil.select(jCas, Sentence.class);
  int headingEnd = 0;
  if (sentences.size() > 2) {
    Iterator<Sentence> itr = sentences.iterator();
    itr.next();
    headingEnd = itr.next().getEnd();
  }
  boolean inHeading = getFirstOffset() < headingEnd;

  features.put(getId(), inHeading ? 1.0 : 0.0);
  return features;
}
 
Example #15
Source File: ConstraintsGeneratorTest.java    From webanno with Apache License 2.0 6 votes vote down vote up
private JCas makeJCasOneSentence() throws UIMAException
{
    TypeSystemDescription global = TypeSystemDescriptionFactory.createTypeSystemDescription();
    TypeSystemDescription local = TypeSystemDescriptionFactory
            .createTypeSystemDescriptionFromPath(
                    "src/test/resources/desc/types/webannoTestTypes.xml");
   
    TypeSystemDescription merged = CasCreationUtils.mergeTypeSystems(asList(global, local));
    
    JCas jcas = JCasFactory.createJCas(merged);
    
    DocumentMetaData.create(jcas).setDocumentId("doc");
    
    TokenBuilder<Token, Sentence> tb = new TokenBuilder<>(Token.class,
            Sentence.class);
    tb.buildTokens(jcas, "This is a test .");
    
    return jcas;
}
 
Example #16
Source File: AnnotationEditorBase.java    From webanno with Apache License 2.0 6 votes vote down vote up
private List<AnnotationLayer> getLayersToRender()
{
    AnnotatorState state = getModelObject();
    List<AnnotationLayer> layersToRender = new ArrayList<>();
    for (AnnotationLayer layer : state.getAnnotationLayers()) {
        boolean isSegmentationLayer = layer.getName().equals(Token.class.getName())
                || layer.getName().equals(Sentence.class.getName());
        boolean isUnsupportedLayer = layer.getType().equals(CHAIN_TYPE)
                && (state.getMode().equals(Mode.AUTOMATION)
                || state.getMode().equals(Mode.CORRECTION)
                || state.getMode().equals(Mode.CURATION));

        if (layer.isEnabled() && !isSegmentationLayer && !isUnsupportedLayer) {
            layersToRender.add(layer);
        }
    }
    return layersToRender;
}
 
Example #17
Source File: WebAnnoTsv3WriterTestBase.java    From webanno with Apache License 2.0 6 votes vote down vote up
@Test
public void testAnnotationWithLeadingWhitespace() throws Exception
{
    JCas jcas = JCasFactory.createJCas();
    
    DocumentMetaData.create(jcas).setDocumentId("doc");
    jcas.setDocumentText("one  two");
    new Token(jcas, 0, 3).addToIndexes();
    new Token(jcas, 5, 8).addToIndexes();
    new Sentence(jcas, 0, 8).addToIndexes();
    
    // NE has leading whitespace - on export this should be silently dropped
    new NamedEntity(jcas, 4, 8).addToIndexes();
    
    writeAndAssertEquals(jcas);
}
 
Example #18
Source File: WebAnnoTsv3WriterTestBase.java    From webanno with Apache License 2.0 6 votes vote down vote up
@Test
public void testAnnotationWithLeadingWhitespaceAtStart() throws Exception
{
    JCas jcas = JCasFactory.createJCas();
    
    DocumentMetaData.create(jcas).setDocumentId("doc");
    jcas.setDocumentText(" one two");
    new Token(jcas, 1, 4).addToIndexes();
    new Token(jcas, 5, 8).addToIndexes();
    new Sentence(jcas, 1, 8).addToIndexes();
    
    // NE has leading whitespace - on export this should be silently dropped
    new NamedEntity(jcas, 0, 4).addToIndexes();
    
    writeAndAssertEquals(jcas);
}
 
Example #19
Source File: WebAnnoTsv3WriterTestBase.java    From webanno with Apache License 2.0 6 votes vote down vote up
@Test
public void testAnnotationWithTrailingWhitespaceAtEnd() throws Exception
{
    JCas jcas = JCasFactory.createJCas();
    
    DocumentMetaData.create(jcas).setDocumentId("doc");
    jcas.setDocumentText("one two ");
    new Token(jcas, 0, 3).addToIndexes();
    new Token(jcas, 4, 7).addToIndexes();
    new Sentence(jcas, 0, 7).addToIndexes();
    
    // NE has trailing whitespace - on export this should be silently dropped
    new NamedEntity(jcas, 4, 8).addToIndexes();
    
    writeAndAssertEquals(jcas);
}
 
Example #20
Source File: WebAnnoTsv3WriterTestBase.java    From webanno with Apache License 2.0 6 votes vote down vote up
@Test
public void testTwoSentencesWithNoSpaceInBetween() throws Exception
{
    TypeSystemDescription global = TypeSystemDescriptionFactory.createTypeSystemDescription();
    TypeSystemDescription local = TypeSystemDescriptionFactory
            .createTypeSystemDescriptionFromPath(
                    "src/test/resources/desc/type/webannoTestTypes.xml");
   
    TypeSystemDescription merged = CasCreationUtils.mergeTypeSystems(asList(global, local));
    
    JCas jcas = JCasFactory.createJCas(merged);
    
    DocumentMetaData.create(jcas).setDocumentId("doc");
    jcas.setDocumentText("onetwo");
    new Token(jcas, 0, 3).addToIndexes();
    new Sentence(jcas, 0, 3).addToIndexes();
    new Token(jcas, 3, 6).addToIndexes();
    new Sentence(jcas, 3, 6).addToIndexes();
    
    writeAndAssertEquals(jcas);
}
 
Example #21
Source File: WebAnnoTsv3WriterTestBase.java    From webanno with Apache License 2.0 6 votes vote down vote up
@Test
public void testZeroLengthSpanBetweenAdjacentTokens() throws Exception
{
    JCas jcas = makeJCas();
    jcas.setDocumentText("word.");
    new Token(jcas, 0,4).addToIndexes();
    new Token(jcas, 4,5).addToIndexes();
    new Sentence(jcas, 0,5).addToIndexes();
    
    CAS cas = jcas.getCas();
    Type simpleSpanType = cas.getTypeSystem().getType("webanno.custom.SimpleSpan");
    
    // Insert zero-width annotation between the adjacent tokens (at end of first token).
    AnnotationFS fs1a = cas.createAnnotation(simpleSpanType, 4, 4);
    cas.addFsToIndexes(fs1a);

    writeAndAssertEquals(jcas, 
            WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList("webanno.custom.SimpleSpan"));
}
 
Example #22
Source File: WebannoTsv3Reader.java    From webanno with Apache License 2.0 6 votes vote down vote up
private void createSentence(JCas aJCas, String aLine, int aBegin, int aEnd, int aPrevEnd)
{
    // If the next sentence immediately follows the last one without any space or line break
    // in between, then we need to chop off again the linebreak that we added at the end of the
    // last sentence - otherwise offsets will be off on a round-trip.
    if (aPrevEnd == aBegin && coveredText.length() > 0
            && (coveredText.charAt(coveredText.length() - 1) == '\n')) {
        coveredText.deleteCharAt(coveredText.length() - 1);
    }

    if (aPrevEnd + 1 < aBegin) {
        // FIXME This is very slow. Better use StringUtils.repeat()
        StringBuilder pad = new StringBuilder(); // if there is plenty of spaces between
                                                 // sentences
        for (int i = aPrevEnd + 1; i < aBegin; i++) {
            pad.append(" ");
        }
        coveredText.append(pad).append(aLine).append(LF);
    }
    else {
        coveredText.append(aLine).append(LF);
    }
    Sentence sentence = new Sentence(aJCas, aBegin, aEnd);
    sentence.addToIndexes();
}
 
Example #23
Source File: WebannoTsv3Writer.java    From webanno with Apache License 2.0 6 votes vote down vote up
private void setTokenSentenceAddress(JCas aJCas)
{
    int sentNMumber = 1;
    for (Sentence sentence : select(aJCas, Sentence.class)) {
        int lineNumber = 1;
        for (Token token : selectCovered(Token.class, sentence)) {
            AnnotationUnit unit = new AnnotationUnit(token.getBegin(), token.getEnd(), false,
                    token.getCoveredText());
            units.add(unit);
            if (lineNumber == 1) {
                sentenceUnits.put(unit, sentence.getCoveredText());
            }
            unitsLineNumber.put(unit, sentNMumber + "-" + lineNumber);
            lineNumber++;
        }
        sentNMumber++;
    }
}
 
Example #24
Source File: SentenceConstraintsFilter.java    From argument-reasoning-comprehension-task with Apache License 2.0 6 votes vote down vote up
@Override
public boolean keepArgument(JCas jCas)
{
    List<Sentence> sentences = new ArrayList<>(JCasUtil.select(jCas, Sentence.class));

    // remove one-sentence arguments
    if (sentences.size() == 1) {
        return false;
    }

    for (Sentence s : sentences) {
        if (s.getCoveredText().length() > MAX_SENTENCE_LENGTH) {
            return false;
        }
    }

    return true;
}
 
Example #25
Source File: WebAnnoTsv3WriterTestBase.java    From webanno with Apache License 2.0 5 votes vote down vote up
private static JCas makeJCasTwoSentences() throws UIMAException
{
    JCas jcas = makeJCas();
    
    TokenBuilder<Token, Sentence> tb = new TokenBuilder<>(Token.class,
            Sentence.class);
    tb.buildTokens(jcas, "He loves her .\nShe loves him not .");
    
    assertEquals(2, select(jcas, Sentence.class).size());
    
    return jcas;
}
 
Example #26
Source File: WebannoTsv1Reader.java    From webanno with Apache License 2.0 5 votes vote down vote up
/**
 * Add sentence layer to CAS
 */
private void createSentence(JCas aJCas, List<Integer> firstTokenInSentence,
        Map<String, Token> tokensStored)
{
    for (int i = 0; i < firstTokenInSentence.size(); i++) {
        Sentence outSentence = new Sentence(aJCas);
        // Only last sentence, and no the only sentence in the document (i!=0)
        if (i == firstTokenInSentence.size() - 1 && i != 0) {
            outSentence.setBegin(tokensStored.get("t_" + firstTokenInSentence.get(i)).getEnd());
            outSentence.setEnd(tokensStored.get("t_" + (tokensStored.size())).getEnd());
            outSentence.addToIndexes();
            break;
        }
        if (i == firstTokenInSentence.size() - 1 && i == 0) {
            outSentence.setBegin(tokensStored.get("t_" + firstTokenInSentence.get(i))
                    .getBegin());
            outSentence.setEnd(tokensStored.get("t_" + (tokensStored.size())).getEnd());
            outSentence.addToIndexes();
        }
        else if (i == 0) {
            outSentence.setBegin(tokensStored.get("t_" + firstTokenInSentence.get(i))
                    .getBegin());
            outSentence.setEnd(tokensStored.get("t_" + firstTokenInSentence.get(i + 1))
                    .getEnd());
            outSentence.addToIndexes();
        }
        else {
            outSentence.setBegin(
                    tokensStored.get("t_" + firstTokenInSentence.get(i)).getEnd() + 1);
            outSentence
                    .setEnd(tokensStored.get("t_" + firstTokenInSentence.get(i + 1)).getEnd());
            outSentence.addToIndexes();
        }
    }
}
 
Example #27
Source File: RelationAdapterTest.java    From webanno with Apache License 2.0 5 votes vote down vote up
@Test
public void thatRelationAttachmentBehaviorOnCreateWorks() throws Exception
{
    TokenBuilder<Token, Sentence> builder = new TokenBuilder<>(Token.class, Sentence.class);
    builder.buildTokens(jcas, "This is a test .");

    for (Token t : select(jcas, Token.class)) {
        POS pos = new POS(jcas, t.getBegin(), t.getEnd());
        t.setPos(pos);
        pos.addToIndexes();
    }

    RelationAdapter sut = new RelationAdapter(layerSupportRegistry, featureSupportRegistry,
        null, depLayer, FEAT_REL_TARGET, FEAT_REL_SOURCE,
        () -> asList(dependencyLayerGovernor, dependencyLayerDependent), behaviors);

    List<POS> posAnnotations = new ArrayList<>(select(jcas, POS.class));
    List<Token> tokens = new ArrayList<>(select(jcas, Token.class));

    POS source = posAnnotations.get(0);
    POS target = posAnnotations.get(1);

    AnnotationFS dep = sut.add(document, username, source, target, jcas.getCas());

    assertThat(FSUtil.getFeature(dep, FEAT_REL_SOURCE, Token.class)).isEqualTo(tokens.get(0));
    assertThat(FSUtil.getFeature(dep, FEAT_REL_TARGET, Token.class)).isEqualTo(tokens.get(1));
}
 
Example #28
Source File: SpanAdapterTest.java    From webanno with Apache License 2.0 5 votes vote down vote up
@Test
public void thatSpanAnchoringAndOverlapBehaviorsWorkInConcert() throws AnnotationException
{
    TokenBuilder<Token, Sentence> builder = new TokenBuilder<>(Token.class, Sentence.class);
    builder.buildTokens(jcas, "This is a test .");

    SpanAdapter sut = new SpanAdapter(layerSupportRegistry, featureSupportRegistry, null,
            neLayer, () -> asList(), behaviors);

    // First time should work - we annotate the whole word "This"
    neLayer.setOverlapMode(ANY_OVERLAP);
    sut.add(document, username, jcas.getCas(), 0, 4);
    
    // Adding another annotation at the same place DOES NOT work
    neLayer.setOverlapMode(NO_OVERLAP);
    assertThatExceptionOfType(AnnotationException.class)
            .isThrownBy(() -> sut.add(document, username, jcas.getCas(), 0, 1))
            .withMessageContaining("no overlap or stacking");
    
    neLayer.setOverlapMode(OVERLAP_ONLY);
    assertThatExceptionOfType(AnnotationException.class)
            .isThrownBy(() -> sut.add(document, username, jcas.getCas(), 0, 1))
            .withMessageContaining("stacking is not allowed");
    
    // Adding another annotation at the same place DOES work
    neLayer.setOverlapMode(STACKING_ONLY);
    assertThatCode(() -> sut.add(document, username, jcas.getCas(), 0, 1))
            .doesNotThrowAnyException();
    
    neLayer.setOverlapMode(ANY_OVERLAP);
    assertThatCode(() -> sut.add(document, username, jcas.getCas(), 0, 1))
            .doesNotThrowAnyException();
}
 
Example #29
Source File: SpanAdapterTest.java    From webanno with Apache License 2.0 5 votes vote down vote up
@Test
public void thatSpanOverlapBehaviorOnValidateGeneratesErrors() throws AnnotationException
{
    TokenBuilder<Token, Sentence> builder = new TokenBuilder<>(Token.class, Sentence.class);
    builder.buildTokens(jcas, "This is a test .");

    SpanAdapter sut = new SpanAdapter(layerSupportRegistry, featureSupportRegistry, null,
            neLayer, () -> asList(), behaviors);

    // Add two annotations
    neLayer.setOverlapMode(ANY_OVERLAP);
    sut.add(document, username, jcas.getCas(), 0, 1);
    sut.add(document, username, jcas.getCas(), 0, 1);

    //Validation succeeds
    neLayer.setOverlapMode(ANY_OVERLAP);
    assertThat(sut.validate(jcas.getCas()))
            .isEmpty();

    neLayer.setOverlapMode(STACKING_ONLY);
    assertThat(sut.validate(jcas.getCas()))
            .isEmpty();

    //Validation fails
    neLayer.setOverlapMode(OVERLAP_ONLY);
    assertThat(sut.validate(jcas.getCas()))
            .extracting(Pair::getLeft)
            .usingElementComparatorIgnoringFields("source")
            .containsExactly(
                    LogMessage.error(null, "Stacked annotation at [0-4]"),
                    LogMessage.error(null, "Stacked annotation at [0-4]"));

    neLayer.setOverlapMode(NO_OVERLAP);
    assertThat(sut.validate(jcas.getCas()))
            .extracting(Pair::getLeft)
            .usingElementComparatorIgnoringFields("source")
            .containsExactly(
                    LogMessage.error(null, "Stacked annotation at [0-4]"),
                    LogMessage.error(null, "Stacked annotation at [0-4]"));
}
 
Example #30
Source File: KnowNER.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
private AnalysisEngine initializeClassifier(String language) throws ResourceInitializationException, SQLException {
    logger.debug("Initializing KnowNER for '" + language + "'.");

    long start = System.currentTimeMillis();

    List<AnalysisEngineDescription> descriptions = new ArrayList<>();

    if (model.equals("NED")) {
        descriptions.add(AnalysisEngineFactory.createEngineDescription(BmeowTypeAnnotator.class,
                BmeowTypeAnnotator.GOLD, false));
        descriptions.add(AnalysisEngineFactory.createEngineDescription(RemoveNamedEntityAnnotator.class));
    }
    descriptions.add(AnalysisEngineFactory.createEngineDescription(DictionaryMatchAnnotator.class,
            DictionaryMatchAnnotator.PARAM_LANGUAGE, language));
    descriptions.add(AnalysisEngineFactory.createEngineDescription(DictionaryFeaturesAnnotator.class,
            DictionaryFeaturesAnnotator.PARAM_LANGUAGE, language));

    descriptions.add(createEngineDescription(LocalFeaturesTcAnnotator.class,
	TcAnnotator.PARAM_TC_MODEL_LOCATION, KnowNERSettings.getModelPath(language, model).toFile(),
	LocalFeaturesTcAnnotator.PARAM_LANGUAGE, language,
	TcAnnotator.PARAM_NAME_SEQUENCE_ANNOTATION, Sentence.class.getName(),
	TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName()));

    descriptions.add(createEngineDescription(KnowNERNamedEntityPostClassificationBMEOWAnnotator.class));

    AnalysisEngineDescription[] analysisEngineDescriptions = new AnalysisEngineDescription[descriptions.size()];
    for (int i = 0; i < descriptions.size(); i++) {
        analysisEngineDescriptions[i] = descriptions.get(i);
    }

    ResourceManager mgr = ((UimaContextAdmin) getContext()).getResourceManager();

    AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(
            createEngineDescription(analysisEngineDescriptions), mgr, null);

    long dur = System.currentTimeMillis() - start;
    logger.info("Initialized KnowNER-" + language + " in " + dur/1000 + "s.");

    return ae;
}