Java Code Examples for de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence#addToIndexes()

The following examples show how to use de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence#addToIndexes() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: StanfordTokenizer.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
  String text = aJCas.getDocumentText();
  Annotation document = new Annotation(text);
  StanfordCoreNLP stanfordCoreNLP;

  if(!languageMap.containsKey(aJCas.getDocumentLanguage())) {
    throw new AnalysisEngineProcessException(new LanguageNotSupportedException("Language Not Supported"));
  }

  stanfordCoreNLP = stanfordCoreNLPs[languageMap.get(aJCas.getDocumentLanguage())];

  stanfordCoreNLP.annotate(document);
  List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
  for (CoreMap sentence : sentences) {
    int sstart = sentence.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
    int ssend = sentence.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
    Sentence jsentence = new Sentence(aJCas, sstart, ssend);
    jsentence.addToIndexes();

    for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
      Token casToken = new Token(aJCas, token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class), token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
      casToken.addToIndexes();
    }
  }
}
 
Example 2
Source File: Tcf2DKPro.java    From inception with Apache License 2.0 6 votes vote down vote up
public void convertSentences(JCas aJCas, TextCorpus aCorpusData,
        Map<String, Token> aTokens)
{
    if (aCorpusData.getSentencesLayer() == null) {
        // No layer to read from.
        return;
    }

    for (int i = 0; i < aCorpusData.getSentencesLayer().size(); i++) {
        eu.clarin.weblicht.wlfxb.tc.api.Token[] sentencesTokens = aCorpusData
                .getSentencesLayer().getTokens(aCorpusData.getSentencesLayer().getSentence(i));

        Sentence outSentence = new Sentence(aJCas);

        outSentence.setBegin(aTokens.get(sentencesTokens[0].getID()).getBegin());
        outSentence.setEnd(aTokens.get(sentencesTokens[sentencesTokens.length - 1].getID())
                .getEnd());
        outSentence.addToIndexes();
    }
}
 
Example 3
Source File: WebannoTsv3Reader.java    From webanno with Apache License 2.0 6 votes vote down vote up
private void createSentence(JCas aJCas, String aLine, int aBegin, int aEnd, int aPrevEnd)
{
    // If the next sentence immediately follows the last one without any space or line break
    // in between, then we need to chop off again the linebreak that we added at the end of the
    // last sentence - otherwise offsets will be off on a round-trip.
    if (aPrevEnd == aBegin && coveredText.length() > 0
            && (coveredText.charAt(coveredText.length() - 1) == '\n')) {
        coveredText.deleteCharAt(coveredText.length() - 1);
    }

    if (aPrevEnd + 1 < aBegin) {
        // FIXME This is very slow. Better use StringUtils.repeat()
        StringBuilder pad = new StringBuilder(); // if there is plenty of spaces between
                                                 // sentences
        for (int i = aPrevEnd + 1; i < aBegin; i++) {
            pad.append(" ");
        }
        coveredText.append(pad).append(aLine).append(LF);
    }
    else {
        coveredText.append(aLine).append(LF);
    }
    Sentence sentence = new Sentence(aJCas, aBegin, aEnd);
    sentence.addToIndexes();
}
 
Example 4
Source File: StringMatchingRecommenderTest.java    From inception with Apache License 2.0 5 votes vote down vote up
private List<CAS> getTestNECas(String aText, String[] aVals, int[][] aNEIndices,
        int[][] aSentIndices, int[][] aTokenIndices)
    throws Exception
{
    JCas jcas = JCasFactory.createText(aText, "de");

    for (int j = 0; j < aSentIndices.length; j++) {
        Sentence newSent = new Sentence(jcas, aSentIndices[j][0], aSentIndices[j][1]);
        newSent.addToIndexes();
    }

    for (int k = 0; k < aTokenIndices.length; k++) {
        Token newToken = new Token(jcas, aTokenIndices[k][0], aTokenIndices[k][1]);
        newToken.addToIndexes();
    }

    for (int i = 0; i < aVals.length; i++) {
        NamedEntity newNE = new NamedEntity(jcas, aNEIndices[i][0], aNEIndices[i][1]);
        newNE.setValue(aVals[i]);
        newNE.addToIndexes();
    }

    List<CAS> casses = new ArrayList<>();
    casses.add(jcas.getCas());

    return casses;
}
 
Example 5
Source File: AnnotationSpansTest.java    From argument-reasoning-comprehension-task with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp()
        throws Exception
{
    jCas = JCasFactory.createJCas();
    jCas.setDocumentText("s0t0 s0t2 s1t0 s2t0 s2t0 s3t0");
    jCas.setDocumentLanguage("en");

    Sentence s0 = new Sentence(jCas, 0, 9);
    s0.addToIndexes();

    Sentence s1 = new Sentence(jCas, 10, 14);
    s1.addToIndexes();

    Sentence s2 = new Sentence(jCas, 15, 24);
    s2.addToIndexes();

    Sentence s3 = new Sentence(jCas, 25, 29);
    s3.addToIndexes();

    Premise p1 = new Premise(jCas, 0, 14);
    p1.addToIndexes();

    Premise p2 = new Premise(jCas, 25, 29);
    p2.addToIndexes();

    System.out.println("'" + s0.getCoveredText() + "'");
    System.out.println("'" + s1.getCoveredText() + "'");
    System.out.println("'" + s2.getCoveredText() + "'");
    System.out.println("'" + s3.getCoveredText() + "'");
    System.out.println("p1: '" + p1.getCoveredText() + "'");
    System.out.println("p2: '" + p2.getCoveredText() + "'");
}
 
Example 6
Source File: WebannoTsv1Reader.java    From webanno with Apache License 2.0 5 votes vote down vote up
/**
 * Add sentence layer to CAS
 */
private void createSentence(JCas aJCas, List<Integer> firstTokenInSentence,
        Map<String, Token> tokensStored)
{
    for (int i = 0; i < firstTokenInSentence.size(); i++) {
        Sentence outSentence = new Sentence(aJCas);
        // Only last sentence, and no the only sentence in the document (i!=0)
        if (i == firstTokenInSentence.size() - 1 && i != 0) {
            outSentence.setBegin(tokensStored.get("t_" + firstTokenInSentence.get(i)).getEnd());
            outSentence.setEnd(tokensStored.get("t_" + (tokensStored.size())).getEnd());
            outSentence.addToIndexes();
            break;
        }
        if (i == firstTokenInSentence.size() - 1 && i == 0) {
            outSentence.setBegin(tokensStored.get("t_" + firstTokenInSentence.get(i))
                    .getBegin());
            outSentence.setEnd(tokensStored.get("t_" + (tokensStored.size())).getEnd());
            outSentence.addToIndexes();
        }
        else if (i == 0) {
            outSentence.setBegin(tokensStored.get("t_" + firstTokenInSentence.get(i))
                    .getBegin());
            outSentence.setEnd(tokensStored.get("t_" + firstTokenInSentence.get(i + 1))
                    .getEnd());
            outSentence.addToIndexes();
        }
        else {
            outSentence.setBegin(
                    tokensStored.get("t_" + firstTokenInSentence.get(i)).getEnd() + 1);
            outSentence
                    .setEnd(tokensStored.get("t_" + firstTokenInSentence.get(i + 1)).getEnd());
            outSentence.addToIndexes();
        }
    }
}
 
Example 7
Source File: LineOrientedTextReader.java    From webanno with Apache License 2.0 5 votes vote down vote up
protected Sentence createSentence(final JCas aJCas, final int aBegin,
        final int aEnd)
{
    int[] span = new int[] { aBegin, aEnd };
    trim(aJCas.getDocumentText(), span);
    if (!isEmpty(span[0], span[1])) {
        Sentence seg = new Sentence(aJCas, span[0], span[1]);
        seg.addToIndexes(aJCas);
        return seg;
    }
    else {
        return null;
    }
}