de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token Java Examples

The following examples show how to use de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Tcf2DKPro.java    From inception with Apache License 2.0 6 votes vote down vote up
public void convertNamedEntities(JCas aJCas, TextCorpus aCorpusData,
        Map<String, Token> aTokens)
{
    if (aCorpusData.getNamedEntitiesLayer() == null) {
        // No layer to read from.
        return;
    }

    for (int i = 0; i < aCorpusData.getNamedEntitiesLayer().size(); i++) {
        // get the named entity
        eu.clarin.weblicht.wlfxb.tc.api.NamedEntity entity = aCorpusData
                .getNamedEntitiesLayer().getEntity(i);

        eu.clarin.weblicht.wlfxb.tc.api.Token[] namedEntityTokens = aCorpusData
                .getNamedEntitiesLayer().getTokens(entity);

        NamedEntity outNamedEntity = new NamedEntity(aJCas);

        outNamedEntity.setBegin(getOffsets(namedEntityTokens, aTokens)[0]);
        outNamedEntity.setEnd(getOffsets(namedEntityTokens, aTokens)[1]);
        outNamedEntity.setValue(entity.getType());
        outNamedEntity.addToIndexes();
    }

}
 
Example #2
Source File: WebAnnoTsv3WriterTestBase.java    From webanno with Apache License 2.0 6 votes vote down vote up
@Test
public void testMultiTokenChain() throws Exception
{
    JCas jcas = makeJCasOneSentence();
    CAS cas = jcas.getCas();
    
    List<Token> tokens = new ArrayList<>(select(jcas, Token.class));
    
    Token t1 = tokens.get(0);
    Token t2 = tokens.get(1);
    Token t3 = tokens.get(2);
    Token t4 = tokens.get(3);
    
    Type head = cas.getTypeSystem().getType("webanno.custom.SimpleChain");
    Type link = cas.getTypeSystem().getType("webanno.custom.SimpleLink");
    
    makeChainHead(head,
            makeChainLink(link, cas, t1.getBegin(), t2.getEnd(), null, null, 
            makeChainLink(link, cas, t3.getBegin(), t4.getEnd(), null, null, null)));

    writeAndAssertEquals(jcas, 
            WebannoTsv3Writer.PARAM_CHAIN_LAYERS, asList("webanno.custom.Simple"));
}
 
Example #3
Source File: WebAnnoTsv3WriterTestBase.java    From webanno with Apache License 2.0 6 votes vote down vote up
@Test
public void testSimpleChain() throws Exception
{
    JCas jcas = makeJCasOneSentence();
    CAS cas = jcas.getCas();
    
    List<Token> tokens = new ArrayList<>(select(jcas, Token.class));
    
    Token t1 = tokens.get(0);
    Token t2 = tokens.get(1);
    Token t3 = tokens.get(2);
    
    Type head = cas.getTypeSystem().getType("webanno.custom.SimpleChain");
    Type link = cas.getTypeSystem().getType("webanno.custom.SimpleLink");
    
    makeChainHead(head,
            makeChainLink(link, cas, t1.getBegin(), t1.getEnd(), null, null, 
            makeChainLink(link, cas, t2.getBegin(), t2.getEnd(), null, null,
            makeChainLink(link, cas, t3.getBegin(), t3.getEnd(), null, null, null))));

    writeAndAssertEquals(jcas, 
            WebannoTsv3Writer.PARAM_CHAIN_LAYERS, asList("webanno.custom.Simple"));
}
 
Example #4
Source File: SpanAdapterTest.java    From webanno with Apache License 2.0 6 votes vote down vote up
@Test
public void thatAdjacentAnnotationsDoNotOverlap() throws AnnotationException
{
    jcas.setDocumentText("Test.");
    new Sentence(jcas, 0, 5).addToIndexes();
    new Token(jcas, 0, 4).addToIndexes();
    new Token(jcas, 4, 5).addToIndexes();
    new NamedEntity(jcas, 0, 4).addToIndexes();
    new NamedEntity(jcas, 4, 5).addToIndexes();
    
    SpanAdapter sut = new SpanAdapter(layerSupportRegistry, featureSupportRegistry, null,
            neLayer, () -> asList(), behaviors);

    neLayer.setOverlapMode(NO_OVERLAP);
    assertThat(sut.validate(jcas.getCas()))
            .isEmpty();
}
 
Example #5
Source File: Step0bTextSegmenterA.java    From argument-reasoning-comprehension-task with Apache License 2.0 6 votes vote down vote up
private static void copyParagraphAndTokenAnnotations(JCas source, JCas target)
{
    if (!source.getDocumentText().equals(target.getDocumentText())) {
        throw new IllegalArgumentException("Source and target have different content");
    }

    for (Paragraph p : JCasUtil.select(source, Paragraph.class)) {
        Paragraph paragraph = new Paragraph(target);
        paragraph.setBegin(p.getBegin());
        paragraph.setEnd(p.getEnd());
        paragraph.addToIndexes();
    }

    for (Token t : JCasUtil.select(source, Token.class)) {
        Token token = new Token(target);
        token.setBegin(t.getBegin());
        token.setEnd(t.getEnd());
        token.addToIndexes();
    }
}
 
Example #6
Source File: WebAnnoTsv3WriterTestBase.java    From webanno with Apache License 2.0 6 votes vote down vote up
@Test
public void testAnnotationWithLeadingWhitespaceAtStart() throws Exception
{
    JCas jcas = JCasFactory.createJCas();
    
    DocumentMetaData.create(jcas).setDocumentId("doc");
    jcas.setDocumentText(" one two");
    new Token(jcas, 1, 4).addToIndexes();
    new Token(jcas, 5, 8).addToIndexes();
    new Sentence(jcas, 1, 8).addToIndexes();
    
    // NE has leading whitespace - on export this should be silently dropped
    new NamedEntity(jcas, 0, 4).addToIndexes();
    
    writeAndAssertEquals(jcas);
}
 
Example #7
Source File: WebAnnoTsv3WriterTestBase.java    From webanno with Apache License 2.0 6 votes vote down vote up
@Test
public void testAnnotationWithLeadingWhitespace() throws Exception
{
    JCas jcas = JCasFactory.createJCas();
    
    DocumentMetaData.create(jcas).setDocumentId("doc");
    jcas.setDocumentText("one  two");
    new Token(jcas, 0, 3).addToIndexes();
    new Token(jcas, 5, 8).addToIndexes();
    new Sentence(jcas, 0, 8).addToIndexes();
    
    // NE has leading whitespace - on export this should be silently dropped
    new NamedEntity(jcas, 4, 8).addToIndexes();
    
    writeAndAssertEquals(jcas);
}
 
Example #8
Source File: DiffTestUtils.java    From webanno with Apache License 2.0 6 votes vote down vote up
public static TypeSystemDescription createMultiLinkWithRoleTestTypeSytem()
    throws Exception
{
    List<TypeSystemDescription> typeSystems = new ArrayList<>();

    TypeSystemDescription tsd = new TypeSystemDescription_impl();

    // Link type
    TypeDescription linkTD = tsd.addType(LINK_TYPE, "", CAS.TYPE_NAME_TOP);
    linkTD.addFeature("role", "", CAS.TYPE_NAME_STRING);
    linkTD.addFeature("target", "", Token.class.getName());

    // Link host
    TypeDescription hostTD = tsd.addType(HOST_TYPE, "", CAS.TYPE_NAME_ANNOTATION);
    hostTD.addFeature("links", "", CAS.TYPE_NAME_FS_ARRAY, linkTD.getName(), false);

    typeSystems.add(tsd);
    typeSystems.add(TypeSystemDescriptionFactory.createTypeSystemDescription());

    return CasCreationUtils.mergeTypeSystems(typeSystems);
}
 
Example #9
Source File: LemmaLayerInitializer.java    From webanno with Apache License 2.0 6 votes vote down vote up
@Override
public void configure(Project aProject) throws IOException
{
    AnnotationLayer tokenLayer = annotationSchemaService.findLayer(aProject,
            Token.class.getName());

    AnnotationFeature tokenLemmaFeature = new AnnotationFeature(aProject, tokenLayer, "lemma",
            "lemma", Lemma.class.getName());
    annotationSchemaService.createFeature(tokenLemmaFeature);

    AnnotationLayer lemmaLayer = new AnnotationLayer(Lemma.class.getName(), "Lemma", SPAN_TYPE,
            aProject, true, SINGLE_TOKEN, NO_OVERLAP);
    lemmaLayer.setAttachType(tokenLayer);
    lemmaLayer.setAttachFeature(tokenLemmaFeature);
    annotationSchemaService.createLayer(lemmaLayer);

    AnnotationFeature lemmaFeature = new AnnotationFeature();
    lemmaFeature.setDescription("lemma Annotation");
    lemmaFeature.setName("value");
    lemmaFeature.setType(CAS.TYPE_NAME_STRING);
    lemmaFeature.setProject(aProject);
    lemmaFeature.setUiName("Lemma");
    lemmaFeature.setLayer(lemmaLayer);
    annotationSchemaService.createFeature(lemmaFeature);
}
 
Example #10
Source File: OOVFilter.java    From argument-reasoning-comprehension-task with Apache License 2.0 6 votes vote down vote up
@Override
boolean keepArgument(JCas jCas)
{
    Collection<Token> tokens = JCasUtil.select(jCas, Token.class);

    int oovWords = 0;

    for (Token token : tokens) {
        if (!vocabulary.contains(token.getCoveredText())) {
            oovWords++;
        }
    }

    frequency.addValue(oovWords);
    //        System.out.println(frequency);

    return oovWords <= THRESHOLD;
}
 
Example #11
Source File: CasDiffTest.java    From webanno with Apache License 2.0 6 votes vote down vote up
@Test
public void singleEmptyCasTest()
    throws Exception
{
    String text = "";
    
    CAS user1Cas = JCasFactory.createJCas().getCas();
    user1Cas.setDocumentText(text);
    
    Map<String, List<CAS>> casByUser = new LinkedHashMap<>();
    casByUser.put("user1", asList(user1Cas));

    List<SpanDiffAdapter> diffAdapters = asList(new SpanDiffAdapter(Token.class.getName()));

    DiffResult result = doDiff(diffAdapters, LINK_TARGET_AS_LABEL, casByUser).toResult();
    
    // result.print(System.out);
    
    assertEquals(0, result.size());
    assertEquals(0, result.getDifferingConfigurationSets().size());
}
 
Example #12
Source File: TRExReader.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
private <K extends Constituent> K getConstituent(TAnnotation constituent, Class<K> clazz, JCas jCas) {
  Constituent result = getInstancedConstitient(jCas, constituent, clazz);
  if(constituent.boundaries != null) {
    result.setExplicit(true);
    result.setBegin(constituent.boundaries[0]);
    result.setEnd(constituent.boundaries[1]);
  }
  result.setUri(constituent.uri);
  List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, result.getBegin(), result.getEnd());
  FSArray array = new FSArray(jCas, tokens.size());
  for (int i = 0; i < tokens.size(); i++) {
    array.set(i, tokens.get(i));
  }
  array.addToIndexes();
  result.setTokens(array);
  jCas.addFsToIndexes(clazz.cast(result));
  return clazz.cast(result);
}
 
Example #13
Source File: LappsGridRecommenderConformityTest.java    From inception with Apache License 2.0 6 votes vote down vote up
@Test
@Parameters(method = "getPosServices")
public void testPosConformity(LappsGridService aService) throws Exception
{
    CAS cas = loadData();
    
    predict(aService.getUrl(), cas);

    SoftAssertions softly = new SoftAssertions();
    softly.assertThat(JCasUtil.select(cas.getJCas(), Token.class))
            .as("Prediction should contain Tokens")
            .isNotEmpty();
    softly.assertThat(JCasUtil.select(cas.getJCas(), POS.class))
            .as("Prediction should contain POS tags")
            .isNotEmpty();

    softly.assertAll();
}
 
Example #14
Source File: PredictionsWriter.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
private <T extends Annotation> String getTag(Class<T> clazz, JCas jCas, Token token) {
        List<T> annotationList = JCasUtil.selectCovering(jCas, clazz, token);

        String value = annotationList.isEmpty()? "OTH" : clazz == NamedEntity.class?
                ((NamedEntity)annotationList.get(0)).getValue():
                ((TextClassificationOutcome)annotationList.get(0)).getOutcome();

        if (!"OTH".equals(value)) {

//                        spanish uses different signs for class labels...
            if ("es".equals(language)) {
                value = value.replace("LUG", "LOC")
                        .replace("OTROS", "MISC")
                        .replace("PERS", "PER");
            }
            value = value.replace("ORGANIZATION", "ORG")
                    .replace("LOCATION", "LOC")
                    .replace("PERSON", "PER");
//                        todo add chunk tags if they are not predicted rethink about spanish!
//                        if (!value.contains("-")) {
//
//                            value = ("OTH".equals(prevNE) || !prevNE.contains(value) ? "B-" : "I-") + value;
//                        }
        }
        return value;
    }
 
Example #15
Source File: WebAnnoTsv3WriterTestBase.java    From webanno with Apache License 2.0 6 votes vote down vote up
@Test
public void testAnnotationWithTrailingWhitespaceAtEnd() throws Exception
{
    JCas jcas = JCasFactory.createJCas();
    
    DocumentMetaData.create(jcas).setDocumentId("doc");
    jcas.setDocumentText("one two ");
    new Token(jcas, 0, 3).addToIndexes();
    new Token(jcas, 4, 7).addToIndexes();
    new Sentence(jcas, 0, 7).addToIndexes();
    
    // NE has trailing whitespace - on export this should be silently dropped
    new NamedEntity(jcas, 4, 8).addToIndexes();
    
    writeAndAssertEquals(jcas);
}
 
Example #16
Source File: Tcf2DKPro.java    From inception with Apache License 2.0 6 votes vote down vote up
public void convertSentences(JCas aJCas, TextCorpus aCorpusData,
        Map<String, Token> aTokens)
{
    if (aCorpusData.getSentencesLayer() == null) {
        // No layer to read from.
        return;
    }

    for (int i = 0; i < aCorpusData.getSentencesLayer().size(); i++) {
        eu.clarin.weblicht.wlfxb.tc.api.Token[] sentencesTokens = aCorpusData
                .getSentencesLayer().getTokens(aCorpusData.getSentencesLayer().getSentence(i));

        Sentence outSentence = new Sentence(aJCas);

        outSentence.setBegin(aTokens.get(sentencesTokens[0].getID()).getBegin());
        outSentence.setEnd(aTokens.get(sentencesTokens[sentencesTokens.length - 1].getID())
                .getEnd());
        outSentence.addToIndexes();
    }
}
 
Example #17
Source File: Tcf2DKPro.java    From inception with Apache License 2.0 6 votes vote down vote up
public void convertOrthoGraphy(JCas aJCas, TextCorpus aCorpusData, Map<String, Token> aTokens)
{
    if (aCorpusData.getOrthographyLayer() == null) {
        return;
    }
    
    for (int i = 0; i < aCorpusData.getOrthographyLayer().size(); i++) {
        eu.clarin.weblicht.wlfxb.tc.api.Token[] orthoTokens = aCorpusData.getOrthographyLayer()
                .getTokens(aCorpusData.getOrthographyLayer().getCorrection(i));
        String value = aCorpusData.getOrthographyLayer().getCorrection(i).getString();
        String operation = Optional
                .ofNullable(aCorpusData.getOrthographyLayer().getCorrection(i).getOperation())
                .map(CorrectionOperation::name).orElse(null);

        SofaChangeAnnotation ortho = new SofaChangeAnnotation(aJCas);
        ortho.setBegin(aTokens.get(orthoTokens[0].getID()).getBegin());
        ortho.setEnd(aTokens.get(orthoTokens[0].getID()).getEnd());
        ortho.setValue(value);
        ortho.setOperation(operation);
        ortho.addToIndexes();
    }
}
 
Example #18
Source File: Tcf2DKPro.java    From inception with Apache License 2.0 6 votes vote down vote up
public void convertLemma(JCas aJCas, TextCorpus aCorpusData, Map<String, Token> aTokens)
{
    if (aCorpusData.getLemmasLayer() == null) {
        return;
    }
    for (int i = 0; i < aCorpusData.getLemmasLayer().size(); i++) {
        eu.clarin.weblicht.wlfxb.tc.api.Token[] lemmaTokens = aCorpusData.getLemmasLayer()
                .getTokens(aCorpusData.getLemmasLayer().getLemma(i));
        String value = aCorpusData.getLemmasLayer().getLemma(i).getString();

        Lemma outLemma = new Lemma(aJCas);

        outLemma.setBegin(aTokens.get(lemmaTokens[0].getID()).getBegin());
        outLemma.setEnd(aTokens.get(lemmaTokens[0].getID()).getEnd());
        outLemma.setValue(value);
        outLemma.addToIndexes();

        // Set the lemma to the token
        aTokens.get(lemmaTokens[0].getID()).setLemma(outLemma);
    }

}
 
Example #19
Source File: Tcf2DKPro.java    From inception with Apache License 2.0 6 votes vote down vote up
public void convertPos(JCas aJCas, TextCorpus aCorpusData, Map<String, Token> aTokens)
{
    if (aCorpusData.getPosTagsLayer() == null) {
        return;
    }
    for (int i = 0; i < aCorpusData.getPosTagsLayer().size(); i++) {
        eu.clarin.weblicht.wlfxb.tc.api.Token[] posTokens = aCorpusData.getPosTagsLayer()
                .getTokens(aCorpusData.getPosTagsLayer().getTag(i));
        String value = aCorpusData.getPosTagsLayer().getTag(i).getString();

        POS outPos = new POS(aJCas);

        outPos.setBegin(aTokens.get(posTokens[0].getID()).getBegin());
        outPos.setEnd(aTokens.get(posTokens[0].getID()).getEnd());
        outPos.setPosValue(value);
        POSUtils.assignCoarseValue(outPos);
        outPos.addToIndexes();

        // Set the POS to the token
        aTokens.get(posTokens[0].getID()).setPos(outPos);
    }
}
 
Example #20
Source File: HtmlAnnotationEditor.java    From inception with Apache License 2.0 6 votes vote down vote up
private List<AnnotationLayer> getLayersToRender()
{
    AnnotatorState state = getModelObject();
    List<AnnotationLayer> layersToRender = new ArrayList<>();
    for (AnnotationLayer layer : state.getAnnotationLayers()) {
        boolean isSegmentationLayer = layer.getName().equals(Token.class.getName())
                || layer.getName().equals(Sentence.class.getName());
        boolean isUnsupportedLayer = layer.getType().equals(CHAIN_TYPE)
                && (state.getMode().equals(Mode.AUTOMATION)
                        || state.getMode().equals(Mode.CORRECTION)
                        || state.getMode().equals(Mode.CURATION));

        if (layer.isEnabled() && !isSegmentationLayer && !isUnsupportedLayer) {
            layersToRender.add(layer);
        }
    }
    return layersToRender;
}
 
Example #21
Source File: SpanAdapterTest.java    From webanno with Apache License 2.0 6 votes vote down vote up
@Test
public void thatSpanCrossSentenceBehaviorOnCreateThrowsException()
{
    neLayer.setCrossSentence(false);
    
    TokenBuilder<Token, Sentence> builder = new TokenBuilder<>(Token.class, Sentence.class);
    builder.buildTokens(jcas, "This is a test .\nThis is sentence two .");

    SpanAdapter sut = new SpanAdapter(layerSupportRegistry, featureSupportRegistry, null,
            neLayer, () -> asList(), behaviors);

    assertThatExceptionOfType(MultipleSentenceCoveredException.class)
            .isThrownBy(() -> sut.add(document, username, jcas.getCas(), 0, 
                    jcas.getDocumentText().length()))
            .withMessageContaining("covers multiple sentences");
}
 
Example #22
Source File: SuggestionViewPanel.java    From webanno with Apache License 2.0 6 votes vote down vote up
private String render(CAS aCas, AnnotatorState aBratAnnotatorModel,
        ColoringStrategy aCurationColoringStrategy)
    throws IOException
{
    List<AnnotationLayer> layersToRender = new ArrayList<>();
    for (AnnotationLayer layer : aBratAnnotatorModel.getAnnotationLayers()) {
        boolean isSegmentationLayer = layer.getName().equals(Token.class.getName())
                || layer.getName().equals(Sentence.class.getName());
        boolean isUnsupportedLayer = layer.getType().equals(CHAIN_TYPE);

        if (layer.isEnabled() && !isSegmentationLayer && !isUnsupportedLayer) {
            layersToRender.add(layer);
        }
    }

    VDocument vdoc = new VDocument();
    preRenderer.render(vdoc, aBratAnnotatorModel.getWindowBeginOffset(),
            aBratAnnotatorModel.getWindowEndOffset(), aCas, layersToRender);

    GetDocumentResponse response = new GetDocumentResponse();
    BratRenderer renderer = new BratRenderer(schemaService, coloringService);
    renderer.render(response, aBratAnnotatorModel, vdoc, aCas, aCurationColoringStrategy);
    return JSONUtil.toInterpretableJsonString(response);
}
 
Example #23
Source File: WebannoTsv3Writer.java    From webanno with Apache License 2.0 6 votes vote down vote up
private void setTokenSentenceAddress(JCas aJCas)
{
    int sentNMumber = 1;
    for (Sentence sentence : select(aJCas, Sentence.class)) {
        int lineNumber = 1;
        for (Token token : selectCovered(Token.class, sentence)) {
            AnnotationUnit unit = new AnnotationUnit(token.getBegin(), token.getEnd(), false,
                    token.getCoveredText());
            units.add(unit);
            if (lineNumber == 1) {
                sentenceUnits.put(unit, sentence.getCoveredText());
            }
            unitsLineNumber.put(unit, sentNMumber + "-" + lineNumber);
            lineNumber++;
        }
        sentNMumber++;
    }
}
 
Example #24
Source File: ArgumentPrinterUtils.java    From argument-reasoning-comprehension-task with Apache License 2.0 6 votes vote down vote up
/**
 * Returns true, if the argument component annotation ends at this token
 *
 * @param t    token
 * @param jCas jcas
 * @return boolean
 */
public static boolean argAnnotationEnds(Token t, JCas jCas)
{
    List<ArgumentComponent> argumentAnnotations = new ArrayList<>();

    argumentAnnotations
            .addAll(JCasUtil.selectCovering(jCas, Claim.class, t.getBegin(), t.getEnd()));
    argumentAnnotations
            .addAll(JCasUtil.selectCovering(jCas, Backing.class, t.getBegin(), t.getEnd()));
    argumentAnnotations
            .addAll(JCasUtil.selectCovering(jCas, Premise.class, t.getBegin(), t.getEnd()));
    argumentAnnotations
            .addAll(JCasUtil.selectCovering(jCas, Rebuttal.class, t.getBegin(), t.getEnd()));
    argumentAnnotations
            .addAll(JCasUtil.selectCovering(jCas, Refutation.class, t.getBegin(), t.getEnd()));

    return !argumentAnnotations.isEmpty() && argumentAnnotations.get(0).getEnd() == t.getEnd();
}
 
Example #25
Source File: ArgumentPrinterUtils.java    From argument-reasoning-comprehension-task with Apache License 2.0 6 votes vote down vote up
/**
 * Returns true, if the argument component annotation begins at this token
 *
 * @param t    token
 * @param jCas jcas
 * @return boolean
 */
public static ArgumentComponent argAnnotationBegins(Token t, JCas jCas)
{
    List<ArgumentComponent> argumentAnnotations = new ArrayList<>();

    argumentAnnotations
            .addAll(JCasUtil.selectCovering(jCas, Claim.class, t.getBegin(), t.getEnd()));
    argumentAnnotations
            .addAll(JCasUtil.selectCovering(jCas, Backing.class, t.getBegin(), t.getEnd()));
    argumentAnnotations
            .addAll(JCasUtil.selectCovering(jCas, Premise.class, t.getBegin(), t.getEnd()));
    argumentAnnotations
            .addAll(JCasUtil.selectCovering(jCas, Rebuttal.class, t.getBegin(), t.getEnd()));
    argumentAnnotations
            .addAll(JCasUtil.selectCovering(jCas, Refutation.class, t.getBegin(), t.getEnd()));

    if (!argumentAnnotations.isEmpty() && argumentAnnotations.get(0).getBegin() == t
            .getBegin()) {
        return argumentAnnotations.get(0);
    }

    return null;
}
 
Example #26
Source File: WebAnnoTsv3WriterTestBase.java    From webanno with Apache License 2.0 6 votes vote down vote up
@Test
public void testZeroWidthAnnotationBeyondLastTokenIsMovedToEndOfLastToken() throws Exception
{
    JCas jcas = JCasFactory.createJCas();
    
    DocumentMetaData.create(jcas).setDocumentId("doc");
    jcas.setDocumentText("one two  ");
    new Token(jcas, 0, 3).addToIndexes();
    new Token(jcas, 4, 7).addToIndexes();
    new Sentence(jcas, 0, 7).addToIndexes();
    
    // NE is after the end of the last token and should be moved to the end of the last token
    // otherwise it could not be represented in the TSV3 format.
    new NamedEntity(jcas, 8, 8).addToIndexes();
    
    writeAndAssertEquals(jcas);
}
 
Example #27
Source File: WebannoTsv3Writer.java    From webanno with Apache License 2.0 5 votes vote down vote up
private void setAmbiguity(JCas aJCas)
{
    List<String> spanAndTokenLayers = spanLayers;
    spanAndTokenLayers.add(Token.class.getName());
    for (String l : spanAndTokenLayers) {
        Type type = getType(aJCas.getCas(), l);
        ambigUnits.putIfAbsent(type.getName(), new HashMap<>());
        for (AnnotationFS fs : CasUtil.select(aJCas.getCas(), type)) {
            AnnotationUnit unit = getFirstUnit(fs);
            // multiple token anno
            if (isMultipleTokenAnnotation(fs.getBegin(), fs.getEnd())) {
                SubTokenAnno sta = new SubTokenAnno();
                sta.setBegin(fs.getBegin());
                sta.setEnd(fs.getEnd());
                sta.setText(fs.getCoveredText());
                Set<AnnotationUnit> sus = new LinkedHashSet<>();
                for (AnnotationUnit newUnit : getSubUnits(sta, sus)) {
                    ambigUnits.get(type.getName()).put(newUnit, true);
                }
            }
            // stacked anno
            else if (ambigUnits.get(type.getName()).get(unit) != null) {
                ambigUnits.get(type.getName()).put(unit, true);
            }
            // single or first occurrence of stacked anno
            else {
                ambigUnits.get(type.getName()).put(unit, false);
            }
        }

    }
}
 
Example #28
Source File: OpenNlpPosRecommender.java    From inception with Apache License 2.0 5 votes vote down vote up
private List<POSSample> extractPosSamples(List<CAS> aCasses)
{
    List<POSSample> posSamples = new ArrayList<>();
    
    casses: for (CAS cas : aCasses) {
        Type sentenceType = getType(cas, Sentence.class);
        Type tokenType = getType(cas, Token.class);

        Map<AnnotationFS, List<AnnotationFS>> sentences = indexCovered(cas, sentenceType,
                tokenType);
        for (Map.Entry<AnnotationFS, List<AnnotationFS>> e : sentences.entrySet()) {
            if (posSamples.size() >= traits.getTrainingSetSizeLimit()) {
                break casses;
            }
            
            AnnotationFS sentence = e.getKey();

            Collection<AnnotationFS> tokens = e.getValue();
            
            createPosSample(cas, sentence, tokens).map(posSamples::add);
        }
    }
    
    LOG.debug("Extracted {} POS samples", posSamples.size());
    
    return posSamples;
}
 
Example #29
Source File: Tcf2DKPro.java    From inception with Apache License 2.0 5 votes vote down vote up
/**
 * Get the start and end offsets of a span annotation
 * 
 * @param aSpanTokens
 *            list of span token ids. [t_3,_t_5, t_1]
 * @param aAllTokens
 *            all available tokens in the file
 * @return the offsets.
 */
public int[] getOffsets(String[] aSpanTokens, Map<String, Token> aAllTokens)
{
    List<Integer> beginPositions = new ArrayList<>();
    List<Integer> endPositions = new ArrayList<>();
    for (String token : aSpanTokens) {
        beginPositions.add(aAllTokens.get(token).getBegin());
        endPositions.add(aAllTokens.get(token).getEnd());
    }
    return new int[] { (Collections.min(beginPositions)), (Collections.max(endPositions)) };
}
 
Example #30
Source File: ArgumentPrinterUtils.java    From argument-reasoning-comprehension-task with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a covering sentence if it starts at the token, null otherwise
 *
 * @param t token
 * @return sentence or null
 */
public static Sentence sentenceStartsOnToken(Token t)
{
    List<Sentence> sentences = JCasUtil.selectCovering(Sentence.class, t);

    return (!sentences.isEmpty() && sentences.get(0).getBegin() == t.getBegin()) ?
            sentences.get(0) :
            null;
}