Java Code Examples for de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token#addToIndexes()

The following examples show how to use de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token#addToIndexes() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: StanfordTokenizer.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
  String text = aJCas.getDocumentText();
  Annotation document = new Annotation(text);
  StanfordCoreNLP stanfordCoreNLP;

  if(!languageMap.containsKey(aJCas.getDocumentLanguage())) {
    throw new AnalysisEngineProcessException(new LanguageNotSupportedException("Language Not Supported"));
  }

  stanfordCoreNLP = stanfordCoreNLPs[languageMap.get(aJCas.getDocumentLanguage())];

  stanfordCoreNLP.annotate(document);
  List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
  for (CoreMap sentence : sentences) {
    int sstart = sentence.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
    int ssend = sentence.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
    Sentence jsentence = new Sentence(aJCas, sstart, ssend);
    jsentence.addToIndexes();

    for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
      Token casToken = new Token(aJCas, token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class), token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
      casToken.addToIndexes();
    }
  }
}
 
Example 2
Source File: Step0bTextSegmenterA.java    From argument-reasoning-comprehension-task with Apache License 2.0 6 votes vote down vote up
private static void copyParagraphAndTokenAnnotations(JCas source, JCas target)
{
    if (!source.getDocumentText().equals(target.getDocumentText())) {
        throw new IllegalArgumentException("Source and target have different content");
    }

    for (Paragraph p : JCasUtil.select(source, Paragraph.class)) {
        Paragraph paragraph = new Paragraph(target);
        paragraph.setBegin(p.getBegin());
        paragraph.setEnd(p.getEnd());
        paragraph.addToIndexes();
    }

    for (Token t : JCasUtil.select(source, Token.class)) {
        Token token = new Token(target);
        token.setBegin(t.getBegin());
        token.setEnd(t.getEnd());
        token.addToIndexes();
    }
}
 
Example 3
Source File: WebAnnoTsv3WriterTestBase.java    From webanno with Apache License 2.0 6 votes vote down vote up
@Test
public void testElevatedType() throws Exception {
    JCas jcas = JCasFactory.createJCas();
    
    DocumentMetaData.create(jcas).setDocumentId("doc");
    jcas.setDocumentText("John");
    
    // Add an elevated type which is not a direct subtype of Annotation. This type not be picked
    // up by the schema analyzer but should still be serialized as the POS type which is in fact
    // picked up.
    POS_NOUN pos = new POS_NOUN(jcas, 0, 4);
    pos.setPosValue("NN");
    pos.setCoarseValue("NOUN");
    pos.addToIndexes();
    
    Token t = new Token(jcas, 0, 4);
    t.setPos(pos);
    t.addToIndexes();
    new Sentence(jcas, 0, 4).addToIndexes();
            
    writeAndAssertEquals(jcas);
}
 
Example 4
Source File: StringMatchingRecommenderTest.java    From inception with Apache License 2.0 5 votes vote down vote up
private List<CAS> getTestNECas(String aText, String[] aVals, int[][] aNEIndices,
        int[][] aSentIndices, int[][] aTokenIndices)
    throws Exception
{
    JCas jcas = JCasFactory.createText(aText, "de");

    for (int j = 0; j < aSentIndices.length; j++) {
        Sentence newSent = new Sentence(jcas, aSentIndices[j][0], aSentIndices[j][1]);
        newSent.addToIndexes();
    }

    for (int k = 0; k < aTokenIndices.length; k++) {
        Token newToken = new Token(jcas, aTokenIndices[k][0], aTokenIndices[k][1]);
        newToken.addToIndexes();
    }

    for (int i = 0; i < aVals.length; i++) {
        NamedEntity newNE = new NamedEntity(jcas, aNEIndices[i][0], aNEIndices[i][1]);
        newNE.setValue(aVals[i]);
        newNE.addToIndexes();
    }

    List<CAS> casses = new ArrayList<>();
    casses.add(jcas.getCas());

    return casses;
}
 
Example 5
Source File: WebannoTsv1Reader.java    From webanno with Apache License 2.0 5 votes vote down vote up
/**
 * Create {@link Token} in the {@link CAS}. If the lemma and pos columns are not empty it will
 * create {@link Lemma} and {@link POS} annotations
 */
private void createToken(JCas aJCas, StringBuilder text, Map<Integer, String> tokens,
        Map<Integer, String> pos, Map<Integer, String> lemma, Map<String, Token> tokensStored)
{
    int tokenBeginPosition = 0;
    int tokenEndPosition = 0;

    for (int i = 1; i <= tokens.size(); i++) {
        tokenBeginPosition = text.indexOf(tokens.get(i), tokenBeginPosition);
        Token outToken = new Token(aJCas, tokenBeginPosition, text.indexOf(tokens.get(i),
                tokenBeginPosition) + tokens.get(i).length());
        tokenEndPosition = text.indexOf(tokens.get(i), tokenBeginPosition)
                + tokens.get(i).length();
        tokenBeginPosition = tokenEndPosition;
        outToken.addToIndexes();

        // Add pos to CAS if exist
        if (!pos.get(i).equals("_")) {
            POS outPos = new POS(aJCas, outToken.getBegin(), outToken.getEnd());
            outPos.setPosValue(pos.get(i));
            outPos.addToIndexes();
            outToken.setPos(outPos);
        }

        // Add lemma if exist
        if (!lemma.get(i).equals("_")) {
            Lemma outLemma = new Lemma(aJCas, outToken.getBegin(), outToken.getEnd());
            outLemma.setValue(lemma.get(i));
            outLemma.addToIndexes();
            outToken.setLemma(outLemma);
        }
        tokensStored.put("t_" + i, outToken);
    }
}
 
Example 6
Source File: RemoveDanglingRelationsRepairTest.java    From webanno with Apache License 2.0 5 votes vote down vote up
@Test
public void test()
    throws Exception
{
    JCas jcas = JCasFactory.createJCas();

    jcas.setDocumentText("This is a test.");
    
    Token span1 = new Token(jcas, 0, 4);
    span1.addToIndexes();
    
    Token span2 = new Token(jcas, 6, 8);
    
    Dependency dep = new Dependency(jcas, 0, 8);
    dep.setGovernor(span1);
    dep.setDependent(span2);
    dep.addToIndexes();
    
    List<LogMessage> messages = new ArrayList<>();
    CasDoctor cd = new CasDoctor(RemoveDanglingRelationsRepair.class,
            AllFeatureStructuresIndexedCheck.class);
    // A project is not required for this check
    boolean result = cd.analyze(null, jcas.getCas(), messages);
    // A project is not required for this repair
    cd.repair(null, jcas.getCas(), messages);
    
    assertFalse(result);
    
    messages.forEach(System.out::println);
}
 
Example 7
Source File: CurationTestUtils.java    From webanno with Apache License 2.0 5 votes vote down vote up
public static FeatureStructure makeLinkFS(JCas aCas, String aSlotLabel, int aTargetBegin,
        int aTargetEnd)
{
    Token token1 = new Token(aCas, aTargetBegin, aTargetEnd);
    token1.addToIndexes();

    Type linkType = aCas.getTypeSystem().getType(LINK_TYPE);
    FeatureStructure linkA1 = aCas.getCas().createFS(linkType);
    linkA1.setStringValue(linkType.getFeatureByBaseName("role"), aSlotLabel);
    linkA1.setFeatureValue(linkType.getFeatureByBaseName("target"), token1);
    aCas.getCas().addFsToIndexes(linkA1);

    return linkA1;
}
 
Example 8
Source File: DiffTestUtils.java    From webanno with Apache License 2.0 5 votes vote down vote up
public static FeatureStructure makeLinkFS(JCas aCas, String aSlotLabel, int aTargetBegin,
        int aTargetEnd)
{
    Token token1 = new Token(aCas, aTargetBegin, aTargetEnd);
    token1.addToIndexes();

    Type linkType = aCas.getTypeSystem().getType(LINK_TYPE);
    FeatureStructure linkA1 = aCas.getCas().createFS(linkType);
    linkA1.setStringValue(linkType.getFeatureByBaseName("role"), aSlotLabel);
    linkA1.setFeatureValue(linkType.getFeatureByBaseName("target"), token1);
    aCas.getCas().addFsToIndexes(linkA1);

    return linkA1;
}
 
Example 9
Source File: AgreementTestUtils.java    From webanno with Apache License 2.0 5 votes vote down vote up
public static FeatureStructure makeLinkFS(JCas aCas, String aSlotLabel, int aTargetBegin,
        int aTargetEnd)
{
    Token token1 = new Token(aCas, aTargetBegin, aTargetEnd);
    token1.addToIndexes();

    Type linkType = aCas.getTypeSystem().getType(LINK_TYPE);
    FeatureStructure linkA1 = aCas.getCas().createFS(linkType);
    linkA1.setStringValue(linkType.getFeatureByBaseName("role"), aSlotLabel);
    linkA1.setFeatureValue(linkType.getFeatureByBaseName("target"), token1);
    aCas.getCas().addFsToIndexes(linkA1);

    return linkA1;
}
 
Example 10
Source File: MtasUimaParserTest.java    From inception with Apache License 2.0 4 votes vote down vote up
@Test
public void testDependencyRelation() throws Exception
{
    // Set up document with a dummy dependency relation
    jcas.setDocumentText("a b");
    Token t1 = new Token(jcas, 0, 1);
    t1.addToIndexes();
    
    POS p1 = new POS(jcas, t1.getBegin(), t1.getEnd());
    p1.setPosValue("A");
    t1.setPos(p1);
    p1.addToIndexes();

    Token t2 = new Token(jcas, 2, 3);
    t2.addToIndexes();

    POS p2 = new POS(jcas, t2.getBegin(), t2.getEnd());
    p2.setPosValue("B");
    t2.setPos(p2);
    p2.addToIndexes();
    
    Dependency d1 = new Dependency(jcas, t2.getBegin(), t2.getEnd());
    d1.setDependent(t2);
    d1.setGovernor(t1);
    d1.addToIndexes();
    
    // Set up annotation schema with POS and Dependency
    AnnotationLayer tokenLayer = new AnnotationLayer(Token.class.getName(), "Token",
            SPAN_TYPE, project, true, SINGLE_TOKEN, NO_OVERLAP);
    tokenLayer.setId(1l);
    AnnotationFeature tokenLayerPos = new AnnotationFeature(1l, tokenLayer, "pos",
            POS.class.getName());
    
    AnnotationLayer posLayer = new AnnotationLayer(POS.class.getName(), "POS",
            SPAN_TYPE, project, true, SINGLE_TOKEN, NO_OVERLAP);
    posLayer.setId(2l);
    AnnotationFeature posLayerValue = new AnnotationFeature(1l, posLayer, "PosValue",
            CAS.TYPE_NAME_STRING);
    
    AnnotationLayer depLayer = new AnnotationLayer(Dependency.class.getName(),
            "Dependency", RELATION_TYPE, project, true, SINGLE_TOKEN, NO_OVERLAP);
    depLayer.setId(3l);
    depLayer.setAttachType(tokenLayer);
    depLayer.setAttachFeature(tokenLayerPos);
    AnnotationFeature dependencyLayerGovernor = new AnnotationFeature(2l, depLayer,
            "Governor", Token.class.getName());
    AnnotationFeature dependencyLayerDependent = new AnnotationFeature(3l, depLayer,
            "Dependent", Token.class.getName());
        
    when(annotationSchemaService.listAnnotationLayer(any(Project.class)))
            .thenReturn(asList(tokenLayer, posLayer, depLayer));

    when(annotationSchemaService.getAdapter(posLayer)).thenReturn(new SpanAdapter(
        layerSupportRegistry, featureSupportRegistry, null, posLayer, 
        () -> asList(posLayerValue), null));

    when(annotationSchemaService.getAdapter(depLayer))
            .thenReturn(new RelationAdapter(
                layerSupportRegistry, featureSupportRegistry, null, depLayer,
                FEAT_REL_TARGET, FEAT_REL_SOURCE,
                () -> asList(dependencyLayerGovernor, dependencyLayerDependent),
                emptyList()));

    MtasUimaParser sut = new MtasUimaParser(
            asList(tokenLayerPos, posLayerValue, dependencyLayerGovernor,
                    dependencyLayerDependent),
            annotationSchemaService, featureIndexingSupportRegistry);
    MtasTokenCollection tc = sut.createTokenCollection(jcas.getCas());
    
    MtasUtils.print(tc);
    
    List<MtasToken> tokens = new ArrayList<>();
    tc.iterator().forEachRemaining(tokens::add);

    assertThat(tokens)
        .filteredOn(t -> t.getPrefix().startsWith("Dependency"))
        .extracting(t -> t.getPrefix() + "=" + t.getPostfix())
        .containsExactly(
                "Dependency=b", 
                "Dependency-source=a", 
                "Dependency-source.PosValue=A",
                "Dependency-target=b", 
                "Dependency-target.PosValue=B");
}
 
Example 11
Source File: NoMultipleIncomingRelationsCheckTest.java    From webanno with Apache License 2.0 4 votes vote down vote up
@Test
public void testFail() throws Exception
{

    AnnotationLayer relationLayer = new AnnotationLayer();
    relationLayer.setName(Dependency.class.getName());

    relationLayer.setType(WebAnnoConst.RELATION_TYPE);
    Mockito.when(annotationService.listAnnotationLayer(Mockito.isNull()))
            .thenReturn(Arrays.asList(relationLayer));

    JCas jcas = JCasFactory.createJCas();

    jcas.setDocumentText("This is a test.");

    Token spanThis = new Token(jcas, 0, 4);
    spanThis.addToIndexes();

    Token spanIs = new Token(jcas, 5, 7);
    spanIs.addToIndexes();

    Token spanA = new Token(jcas, 8, 9);
    spanA.addToIndexes();

    Dependency dep1 = new Dependency(jcas, 0, 7);
    dep1.setGovernor(spanThis);
    dep1.setDependent(spanIs);
    dep1.addToIndexes();

    Dependency dep2 = new Dependency(jcas, 0, 9);
    dep2.setGovernor(spanA);
    dep2.setDependent(spanIs);
    dep2.addToIndexes();

    List<LogMessage> messages = new ArrayList<>();

    boolean result = check.check(null, jcas.getCas(), messages);

    messages.forEach(System.out::println);

    assertTrue(result);

    // also check the message itself
    assertEquals(1, messages.size());
    assertEquals(
            "[NoMultipleIncomingRelationsCheck] Relation [This] -> [is] points to span that already has an incoming relation [a] -> [is].",
            messages.get(0).toString());

}
 
Example 12
Source File: NoMultipleIncomingRelationsCheckTest.java    From webanno with Apache License 2.0 4 votes vote down vote up
@Test
public void testOK() throws Exception
{
    AnnotationLayer relationLayer = new AnnotationLayer();
    relationLayer.setName(Dependency.class.getName());

    relationLayer.setType(WebAnnoConst.RELATION_TYPE);
    Mockito.when(annotationService.listAnnotationLayer(Mockito.isNull()))
            .thenReturn(Arrays.asList(relationLayer));

    JCas jcas = JCasFactory.createJCas();

    jcas.setDocumentText("This is a test.");

    Token spanThis = new Token(jcas, 0, 4);
    spanThis.addToIndexes();

    Token spanIs = new Token(jcas, 6, 8);
    spanIs.addToIndexes();

    Token spanA = new Token(jcas, 9, 10);
    spanA.addToIndexes();

    Dependency dep1 = new Dependency(jcas, 0, 8);
    dep1.setGovernor(spanThis);
    dep1.setDependent(spanIs);
    dep1.addToIndexes();

    Dependency dep2 = new Dependency(jcas, 6, 10);
    dep2.setGovernor(spanIs);
    dep2.setDependent(spanA);
    dep2.addToIndexes();

    List<LogMessage> messages = new ArrayList<>();

    boolean result = check.check(null, jcas.getCas(), messages);

    messages.forEach(System.out::println);

    assertTrue(result);
}
 
Example 13
Source File: NoMultipleIncomingRelationsCheckTest.java    From webanno with Apache License 2.0 4 votes vote down vote up
@Test
public void testOkBecauseCoref() throws Exception
{

    AnnotationLayer relationLayer = new AnnotationLayer();
    relationLayer.setName(CoreferenceChain.class.getName());

    relationLayer.setType(WebAnnoConst.CHAIN_TYPE);
    Mockito.when(annotationService.listAnnotationLayer(Mockito.isNull()))
            .thenReturn(Arrays.asList(relationLayer));

    JCas jcas = JCasFactory.createJCas();

    jcas.setDocumentText("This is a test.");

    Token spanThis = new Token(jcas, 0, 4);
    spanThis.addToIndexes();

    Token spanIs = new Token(jcas, 6, 8);
    spanIs.addToIndexes();

    Token spanA = new Token(jcas, 9, 10);
    spanA.addToIndexes();

    Dependency dep1 = new Dependency(jcas, 0, 8);
    dep1.setGovernor(spanThis);
    dep1.setDependent(spanIs);
    dep1.addToIndexes();

    Dependency dep2 = new Dependency(jcas, 0, 10);
    dep2.setGovernor(spanA);
    dep2.setDependent(spanIs);
    dep2.addToIndexes();

    List<LogMessage> messages = new ArrayList<>();

    boolean result = check.check(null, jcas.getCas(), messages);

    messages.forEach(System.out::println);

    assertTrue(result);
}
 
Example 14
Source File: ConstraintsGeneratorTest.java    From webanno with Apache License 2.0 4 votes vote down vote up
@Test
public void testSimplePath()
    throws Exception
{
    ConstraintsGrammar parser = new ConstraintsGrammar(new FileInputStream(
            "src/test/resources/rules/10.rules"));
    Parse p = parser.Parse();

    ParsedConstraints constraints = p.accept(new ParserVisitor());

    JCas jcas = JCasFactory.createJCas();
    jcas.setDocumentText("The sun.");

    // Add token annotations
    Token t_the = new Token(jcas, 0, 3);
    t_the.addToIndexes();
    Token t_sun = new Token(jcas, 0, 3);
    t_sun.addToIndexes();

    // Add POS annotations and link them to the tokens
    POS p_the = new POS(jcas, t_the.getBegin(), t_the.getEnd());
    p_the.setPosValue("DET");
    p_the.addToIndexes();
    t_the.setPos(p_the);
    POS p_sun = new POS(jcas, t_sun.getBegin(), t_sun.getEnd());
    p_sun.setPosValue("NN");
    p_sun.addToIndexes();
    t_sun.setPos(p_sun);

    // Add dependency annotations
    Dependency dep_the_sun = new Dependency(jcas);
    dep_the_sun.setGovernor(t_sun);
    dep_the_sun.setDependent(t_the);
    dep_the_sun.setDependencyType("det");
    dep_the_sun.setBegin(dep_the_sun.getGovernor().getBegin());
    dep_the_sun.setEnd(dep_the_sun.getGovernor().getEnd());
    dep_the_sun.addToIndexes();

    Evaluator constraintsEvaluator = new ValuesGenerator();

    List<PossibleValue> possibleValues = constraintsEvaluator.generatePossibleValues(
            dep_the_sun, "DependencyType", constraints);

    List<PossibleValue> expectedOutput = new LinkedList<>();
    expectedOutput.add(new PossibleValue("det", false));

    assertEquals(expectedOutput, possibleValues);
}