opennlp.tools.tokenize.SimpleTokenizer Java Examples

The following examples show how to use opennlp.tools.tokenize.SimpleTokenizer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Chapter1.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 6 votes vote down vote up
private static void nameFinderExample() {
    try {
        String[] sentences = {
            "Tim was a good neighbor. Perhaps not as good a Bob "
            + "Haywood, but still pretty good. Of course Mr. Adam "
            + "took the cake!"};
        Tokenizer tokenizer = SimpleTokenizer.INSTANCE;
        TokenNameFinderModel model = new TokenNameFinderModel(new File(
                "C:\\OpenNLP Models", "en-ner-person.bin"));
        NameFinderME finder = new NameFinderME(model);

        for (String sentence : sentences) {
            // Split the sentence into tokens
            String[] tokens = tokenizer.tokenize(sentence);

            // Find the names in the tokens and return Span objects
            Span[] nameSpans = finder.find(tokens);

            // Print the names extracted from the tokens using the Span data
            System.out.println(Arrays.toString(
                    Span.spansToStrings(nameSpans, tokens)));
        }
    } catch (IOException ex) {
        ex.printStackTrace();
    }
}
 
Example #2
Source File: POSStructureScorer.java    From uncc2014watsonsim with GNU General Public License v2.0 6 votes vote down vote up
public static Parse[] parsePassageText(String p) throws InvalidFormatException{
	
	//initialize 	 
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(sentenceModel);
	Parser parser = ParserFactory.create(
			parserModel,
			20, // beam size
			0.95); // advance percentage
 	 	 
	String[] sentences = sentenceDetector.sentDetect(p);
	Parse[] results = new Parse[sentences.length];
	for (int i=0;i<sentences.length;i++){
		String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]);


		String sent= StringUtils.join(tks," ");
		System.out.println("Found sentence " + sent);
		Parse[] sentResults = ParserTool.parseLine(sent,parser, 1);
		results[i]=sentResults[0];
	}
	return results;
}
 
Example #3
Source File: OpenNlpTests.java    From uncc2014watsonsim with GNU General Public License v2.0 6 votes vote down vote up
public Parse[] parsePassageText(String p) throws InvalidFormatException{
	if (!modelsAreInitialized)init();
	//initialize 	 
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
	Parser parser = ParserFactory.create(
			this.parserModel,
			20, // beam size
			0.95); // advance percentage
	//find sentences, tokenize each, parse each, return top parse for each 	 	 
	String[] sentences = sentenceDetector.sentDetect(p);
	Parse[] results = new Parse[sentences.length];
	for (int i=0;i<sentences.length;i++){
		String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]);
		//StringTokenizer st = new StringTokenizer(tks[i]); 
		//There are several tokenizers available. SimpleTokenizer works best

		String sent= StringUtils.join(tks," ");
		System.out.println("Found sentence " + sent);
		Parse[] sentResults = ParserTool.parseLine(sent,parser, 1);
		results[i]=sentResults[0];
	}
	return results;
}
 
Example #4
Source File: DocumentType.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Override
public void doProcess(JCas jCas) throws AnalysisEngineProcessException {
  DocumentAnnotation da = getDocumentAnnotation(jCas);

  if (Strings.isNullOrEmpty(da.getDocType())) {
    double[] outcomes =
        doccat.categorize(SimpleTokenizer.INSTANCE.tokenize(jCas.getDocumentText()));
    String cat = doccat.getBestCategory(outcomes);

    double max = -Double.MAX_VALUE;
    for (double d : outcomes) {
      if (d > max) {
        max = d;
      }
    }

    if (threshold != null && max > threshold) {
      da.setDocType(cat);
    }

  } else {
    getMonitor()
        .warn("A DocType annotation already exists. A second annotation will not be added.");
  }
}
 
Example #5
Source File: TestNER.java    From Mutters with Apache License 2.0 6 votes vote down vote up
@Test
public void testAddressNER() throws Exception
{
  URL modelUrl = Thread.currentThread().getContextClassLoader().getResource("models/en-ner-address.bin");
  assertThat(modelUrl, is(notNullValue()));

  TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
  assertThat(model, is(notNullValue()));

  NameFinderME nameFinder = new NameFinderME(model);
  String[] tokens = SimpleTokenizer.INSTANCE.tokenize("Send a taxi to 12 Pleasent Street");
  Span[] spans = nameFinder.find(tokens);
  assertThat(spans.length, is(1));

  String[] locations = Span.spansToStrings(spans, tokens);
  assertThat(locations.length, is(1));
  assertThat(locations[0], is("12 Pleasent Street"));
}
 
Example #6
Source File: TestNER.java    From Mutters with Apache License 2.0 6 votes vote down vote up
@Test
public void testDateNER() throws Exception
{
  URL modelUrl = Thread.currentThread().getContextClassLoader().getResource("models/en-ner-dates.bin");
  assertThat(modelUrl, is(notNullValue()));

  TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
  assertThat(model, is(notNullValue()));

  NameFinderME nameFinder = new NameFinderME(model);
  String[] tokens = SimpleTokenizer.INSTANCE
      .tokenize("Mr. John Smith of New York, married Anne Green of London today.");
  assertThat(tokens.length, is(15));

  Span[] spans = nameFinder.find(tokens);
  assertThat(spans.length, is(1));

  String[] locations = Span.spansToStrings(spans, tokens);
  assertThat(locations.length, is(1));
  assertThat(locations[0], is("today"));
}
 
Example #7
Source File: TestNER.java    From Mutters with Apache License 2.0 6 votes vote down vote up
@Test
public void testLocationNER() throws Exception
{
  URL modelUrl = Thread.currentThread().getContextClassLoader().getResource("models/en-ner-locations.bin");
  assertThat(modelUrl, is(notNullValue()));

  TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
  assertThat(model, is(notNullValue()));

  NameFinderME nameFinder = new NameFinderME(model);
  String[] tokens = SimpleTokenizer.INSTANCE
      .tokenize("Mr. John Smith of New York, married Anne Green of London today.");
  assertThat(tokens.length, is(15));

  Span[] spans = nameFinder.find(tokens);
  assertThat(spans.length, is(2));

  String[] locations = Span.spansToStrings(spans, tokens);
  assertThat(locations.length, is(2));
  assertThat(locations[0], is("New York"));
  assertThat(locations[1], is("London"));
}
 
Example #8
Source File: TestNER.java    From Mutters with Apache License 2.0 6 votes vote down vote up
@Test
public void testPersonNER() throws Exception
{
  URL modelUrl = Thread.currentThread().getContextClassLoader().getResource("models/en-ner-persons.bin");
  assertThat(modelUrl, is(notNullValue()));

  TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
  assertThat(model, is(notNullValue()));

  NameFinderME nameFinder = new NameFinderME(model);
  String[] tokens = SimpleTokenizer.INSTANCE
      .tokenize("Mr. John Smith of New York, married Anne Green of London today.");
  assertThat(tokens.length, is(15));

  Span[] spans = nameFinder.find(tokens);
  assertThat(spans.length, is(2));

  String[] names = Span.spansToStrings(spans, tokens);
  assertThat(names.length, is(2));
  assertThat(names[0], is("John Smith"));
  assertThat(names[1], is("Anne Green"));
}
 
Example #9
Source File: OpenNlpService.java    From elasticsearch-ingest-opennlp with Apache License 2.0 6 votes vote down vote up
public ExtractedEntities find(String content, String field) {
    try {
        if (!nameFinderModels.containsKey(field)) {
            throw new ElasticsearchException("Could not find fieldĀ [{}], possible values {}", field, nameFinderModels.keySet());
        }
        TokenNameFinderModel finderModel = nameFinderModels.get(field);
        if (threadLocal.get() == null || !threadLocal.get().equals(finderModel)) {
            threadLocal.set(finderModel);
        }

        String[] tokens = SimpleTokenizer.INSTANCE.tokenize(content);
        Span[] spans = new NameFinderME(finderModel).find(tokens);

        return new ExtractedEntities(tokens, spans);
    } finally {
        threadLocal.remove();
    }
}
 
Example #10
Source File: OpenNlpTartarus.java    From scava with Eclipse Public License 2.0 6 votes vote down vote up
public OpenNlpTartarus() {
		
		logger = (OssmeterLogger) OssmeterLogger.getLogger("uk.ac.nactem.posstemmer");
		
		ClassLoader cl = getClass().getClassLoader();
		try {
			posTaggerME = loadPoSME(cl, "models/en-pos-maxent.bin");
			simpleTokenizer = SimpleTokenizer.INSTANCE;
			SentenceModel sentenceModel = loadSentenceModel(cl, "models/en-sent.bin");
			sentenceDetector = new SentenceDetectorME(sentenceModel);
			logger.info("Models have been sucessfully loaded");
		} catch (IOException e) {
			logger.error("Error while loading the model:", e);
			e.printStackTrace();
		}

//		InputStream tokenizerModelInput = loadModelInput("models/en-token.bin");
//		TokenizerModel tokenizerModel = loadTokenizerModel(tokenizerModelInput);
//		tokenizerME = new TokenizerME(tokenizerModel);


		stemmer = new englishStemmer();
	}
 
Example #11
Source File: LemmetizerUnitTest.java    From tutorials with MIT License 6 votes vote down vote up
@Test
public void givenEnglishDictionary_whenLemmatize_thenLemmasAreDetected() throws Exception {

    SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
    String[] tokens = tokenizer.tokenize("John has a sister named Penny.");

    InputStream inputStreamPOSTagger = getClass().getResourceAsStream("/models/en-pos-maxent.bin");
    POSModel posModel = new POSModel(inputStreamPOSTagger);
    POSTaggerME posTagger = new POSTaggerME(posModel);
    String tags[] = posTagger.tag(tokens);
    InputStream dictLemmatizer = getClass().getResourceAsStream("/models/en-lemmatizer.dict");
    DictionaryLemmatizer lemmatizer = new DictionaryLemmatizer(dictLemmatizer);
    String[] lemmas = lemmatizer.lemmatize(tokens, tags);

    assertThat(lemmas).contains("O", "have", "a", "sister", "name", "O", "O");
}
 
Example #12
Source File: NamedEntityRecognitionUnitTest.java    From tutorials with MIT License 6 votes vote down vote up
@Test
public void givenEnglishPersonModel_whenNER_thenPersonsAreDetected() throws Exception {
    
    SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
    String[] tokens = tokenizer.tokenize("John is 26 years old. His best friend's name is Leonard. He has a sister named Penny.");
    
    InputStream inputStreamNameFinder = getClass().getResourceAsStream("/models/en-ner-person.bin");
    TokenNameFinderModel model = new TokenNameFinderModel(inputStreamNameFinder);
    NameFinderME nameFinderME = new NameFinderME(model);
    List<Span> spans = Arrays.asList(nameFinderME.find(tokens));
    assertThat(spans.toString()).isEqualTo("[[0..1) person, [13..14) person, [20..21) person]");
    List<String> names = new ArrayList<String>();
    int k = 0;
    for (Span s : spans) {
        names.add("");
        for (int index = s.getStart(); index < s.getEnd(); index++) {
            names.set(k, names.get(k) + tokens[index]);
        }
        k++;
    }
    assertThat(names).contains("John","Leonard","Penny");
}
 
Example #13
Source File: ChunkerUnitTest.java    From tutorials with MIT License 6 votes vote down vote up
@Test
public void givenChunkerModel_whenChunk_thenChunksAreDetected() throws Exception {

    SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
    String[] tokens = tokenizer.tokenize("He reckons the current account deficit will narrow to only 8 billion.");

    InputStream inputStreamPOSTagger = getClass().getResourceAsStream("/models/en-pos-maxent.bin");
    POSModel posModel = new POSModel(inputStreamPOSTagger);
    POSTaggerME posTagger = new POSTaggerME(posModel);
    String tags[] = posTagger.tag(tokens);

    InputStream inputStreamChunker = new FileInputStream("src/main/resources/models/en-chunker.bin");
    ChunkerModel chunkerModel = new ChunkerModel(inputStreamChunker);
    ChunkerME chunker = new ChunkerME(chunkerModel);
    String[] chunks = chunker.chunk(tokens, tags);
    assertThat(chunks).contains("B-NP", "B-VP", "B-NP", "I-NP", "I-NP", "I-NP", "B-VP", "I-VP", "B-PP", "B-NP", "I-NP", "I-NP", "O");
}
 
Example #14
Source File: OpenNlpTests.java    From uncc2014watsonsim with GNU General Public License v2.0 5 votes vote down vote up
public void taggerTest(){
	String[] words = SimpleTokenizer.INSTANCE.tokenize(
			"The quick, red fox jumped over the lazy, brown dogs.");
	String[] result = tagger.tag(words);
	for (int i=0 ; i < words.length; i++) {
		System.err.print(words[i] + "/" + result[i] + " ");
	}
	System.err.println("n");
}
 
Example #15
Source File: POSTaggerUnitTest.java    From tutorials with MIT License 5 votes vote down vote up
@Test
public void givenPOSModel_whenPOSTagging_thenPOSAreDetected() throws Exception {

    SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
    String[] tokens = tokenizer.tokenize("John has a sister named Penny.");

    InputStream inputStreamPOSTagger = getClass().getResourceAsStream("/models/en-pos-maxent.bin");
    POSModel posModel = new POSModel(inputStreamPOSTagger);
    POSTaggerME posTagger = new POSTaggerME(posModel);
    String tags[] = posTagger.tag(tokens);
    assertThat(tags).contains("NNP", "VBZ", "DT", "NN", "VBN", "NNP", ".");
}
 
Example #16
Source File: POSStructureScorer.java    From uncc2014watsonsim with GNU General Public License v2.0 5 votes vote down vote up
private static int[] POSScoreSentece(String sampleQACombined) {
	// TODO Auto-generated method stub
	
	
	int[] scorerModel = { 0, 0, 0, 0, 0, 0 };
	String[] words = SimpleTokenizer.INSTANCE.tokenize(sampleQACombined);
	String[] result = tagger.tag(words);
	for (int i = 0; i < result.length; i++) {
		System.out.println(result[i]);
	}
	for (int i=0 ; i < words.length; i++) {
		if(result[i].equals("CD")){
			scorerModel[0]++;
		}else if(result[i].equals("EX")){
			scorerModel[1]++;
		}else if(result[i].equals("JJ") || result[i].equals("JJR") || result[i].equals("JJS")){
			
			scorerModel[2]++;
		}else if(result[i].equals("NN") || result[i].equals("NNS") || result[i].equals("NNP") || result[i].equals("NNPS")){
			scorerModel[3]++;
		}else if(result[i].equals("RB") || result[i].equals("RBR") || result[i].equals("RBS")){
			scorerModel[4]++;
		}else if(result[i].equals("VB") || result[i].equals("VBD") || result[i].equals("VBG") || result[i].equals("VBN") || result[i].equals("VBP") || result[i].equals("VBZ")){
			scorerModel[5]++;
		}
	}
	return scorerModel;
}
 
Example #17
Source File: Chapter2.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 5 votes vote down vote up
private static void usingTheSimpleTokenizerClass() {
    System.out.println("--- SimpleTokenizer");
    SimpleTokenizer simpleTokenizer = SimpleTokenizer.INSTANCE;
    String tokens[] = simpleTokenizer.tokenize(paragraph);
    for (String token : tokens) {
        System.out.println(token);
    }
}
 
Example #18
Source File: NERScorer.java    From uncc2014watsonsim with GNU General Public License v2.0 5 votes vote down vote up
public void taggerTest(){
	String[] words = SimpleTokenizer.INSTANCE.tokenize(
			"The quick, red fox jumped over the lazy, brown dogs.");
	String[] result = tagger.tag(words);
	for (int i=0 ; i < words.length; i++) {
		System.err.print(words[i] + "/" + result[i] + " ");
	}
	System.err.println("n");
}
 
Example #19
Source File: NERScorer.java    From uncc2014watsonsim with GNU General Public License v2.0 5 votes vote down vote up
public Parse[] parsePassageText(String p) throws InvalidFormatException{
	if (!modelsAreInitialized)init();
	//initialize 	 
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
	NameFinderME nameFinder = new NameFinderME(this.nerModel);
	Parser parser = ParserFactory.create(
			this.parserModel,
			20, // beam size
			0.95); // advance percentage
	//find sentences, tokenize each, parse each, return top parse for each 	 	 
	String[] sentences = sentenceDetector.sentDetect(p);
	Parse[] results = new Parse[sentences.length];
	for (int i=0;i<sentences.length;i++){
		//String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]);
		
		//StringTokenizer st = new StringTokenizer(tks[i]); 
		//There are several tokenizers available. SimpleTokenizer works best
		Tokenizer tokenizer = SimpleTokenizer.INSTANCE;
		for (int si = 0; si < sentences.length; si++) {
	        Span[] tokenSpans = tokenizer.tokenizePos(sentences[si]);
	        String[] tokens = Span.spansToStrings(tokenSpans, sentences[si]);
	        Span[] names = nameFinder.find(tokens);
	        for (int ni = 0; ni < names.length; ni++) {
	            Span startSpan = tokenSpans[names[ni].getStart()];
	            int nameStart = startSpan.getStart();
	            Span endSpan = tokenSpans[names[ni].getEnd() - 1];
	            int nameEnd = endSpan.getEnd();
	            String name = sentences[si].substring(nameStart, nameEnd);
	            System.out.println(name);
	        }
	    }
		String sent= StringUtils.join(tokenizer," ");
		System.out.println("Found sentence " + sent);
		Parse[] sentResults = ParserTool.parseLine(sent,parser, 1);
		results[i]=sentResults[0];
	}
	return results;
}
 
Example #20
Source File: SentenceSimilarity.java    From uncc2014watsonsim with GNU General Public License v2.0 5 votes vote down vote up
/** Tokenize a paragraph into sentences, then into words. */
public List<List<String>> tokenizeParagraph(String paragraph) {
	List<List<String>> results = new ArrayList<>();
	// Find sentences, tokenize each, parse each, return top parse for each
	for (String unsplit_sentence : sentenceDetector.sentDetect(paragraph)) {
		results.add(Arrays.asList(
				SimpleTokenizer.INSTANCE.tokenize(unsplit_sentence)
				));
	}
	return results;
}
 
Example #21
Source File: SlotMatcherTests.java    From Mutters with Apache License 2.0 5 votes vote down vote up
@Test
public void when_default_slot_does_not_match_return_default_value()
{
  OpenNLPSlotMatcher slotMatcher = new OpenNLPSlotMatcher(new OpenNLPTokenizer(SimpleTokenizer.INSTANCE));
  slotMatcher.addSlotModel("testSlot", "models/en-ner-persons.bin");
  Intent testIntent = new Intent("testIntent");
  TestSlot testSlot = new TestSlot("testSlot");
  testIntent.addSlot(testSlot);
  Map<Slot<?>, SlotMatch<?>> slotSlotMatchHashMap = slotMatcher.match(new Context(), testIntent, "testUtterance");

  SlotMatch<?> slotMatch = slotSlotMatchHashMap.get(testSlot);
  assertThat(slotMatch.getValue(), is("Default value"));
}
 
Example #22
Source File: NameFilter.java    From wiseowl with MIT License 5 votes vote down vote up
public NameFilter(TokenStream in,String[] modelNames, NameFinderME[] finders) {
  super(in);
  this.tokenizer = SimpleTokenizer.INSTANCE;
  this.finders = finders;
  this.tokenTypeNames = new String[modelNames.length];
  for (int i=0; i < modelNames.length; i++) {
    tokenTypeNames[i] = NE_PREFIX + modelNames[i];
  }
}
 
Example #23
Source File: Chapter7.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 5 votes vote down vote up
private static List<President> createPresidentList() {
    ArrayList<President> list = new ArrayList<>();
    String line = null;
    try (FileReader reader = new FileReader("PresidentList");
            BufferedReader br = new BufferedReader(reader)) {
        while ((line = br.readLine()) != null) {
            SimpleTokenizer simpleTokenizer = SimpleTokenizer.INSTANCE;
            String tokens[] = simpleTokenizer.tokenize(line);
            String name = "";
            String start = "";
            String end = "";
            int i = 0;
            while (!"(".equals(tokens[i])) {
                name += tokens[i] + " ";
                i++;
            }
            start = tokens[i + 1];
            end = tokens[i + 3];
            if (end.equalsIgnoreCase("present")) {
                end = start;
            }
            list.add(new President(name, Integer.parseInt(start),
                    Integer.parseInt(end)));
        }
    } catch (IOException ex) {
        ex.printStackTrace();
    }
    return list;
}
 
Example #24
Source File: Chapter2.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 5 votes vote down vote up
private static void usingStopWordsClassExample() {
    StopWords stopWords = new StopWords("stopwords.txt");
    SimpleTokenizer simpleTokenizer = SimpleTokenizer.INSTANCE;
    paragraph = "A simple approach is to create a class "
            + "to hold and remove stopwords.";
    String tokens[] = simpleTokenizer.tokenize(paragraph);
    String list[] = stopWords.removeStopWords(tokens);
    for (String word : list) {
        System.out.println(word);
    }
    stopWords.displayStopWords();
}
 
Example #25
Source File: OpenNlpService.java    From elasticsearch-ingest-opennlp with Apache License 2.0 4 votes vote down vote up
static String createAnnotatedText(String content, List<ExtractedEntities> extractedEntities) {
    // these spans contain the real offset of each word in start/end variables!
    // the spans of the method argument contain the offset of each token, as mentioned in tokens!
    Span[] spansWithRealOffsets = SimpleTokenizer.INSTANCE.tokenizePos(content);

    List<Span> spansList = new ArrayList<>();
    extractedEntities.stream()
            .map(ExtractedEntities::getSpans)
            .forEach(s -> spansList.addAll(Arrays.asList(s)));

    Span[] spans = NameFinderME.dropOverlappingSpans(spansList.toArray(new Span[0]));
    String[] tokens = extractedEntities.get(0).getTokens();

    // shortcut if there is no enrichment to be done
    if (spans.length == 0) {
        return content;
    }

    StringBuilder builder = new StringBuilder();
    for (int i = 0; i < tokens.length; i++) {
        final int idx = i;
        String token = tokens[i];

        final Optional<Span> optionalSpan = Arrays.stream(spans).filter(s -> s.getStart() == idx).findFirst();
        if (optionalSpan.isPresent()) {
            Span span = optionalSpan.get();
            int start = span.getStart();
            int end = span.getEnd();
            String type = span.getType();

            String[] spanTokens = new String[end - start];
            int spanPosition = 0;
            for (int tokenPosition = start ; tokenPosition < end; tokenPosition++) {
                spanTokens[spanPosition++] = tokens[tokenPosition];
            }
            String entityString = Strings.arrayToDelimitedString(spanTokens, " ");

            builder.append("[");
            builder.append(entityString);
            builder.append("](");
            builder.append(Strings.capitalize(type));
            builder.append("_");
            builder.append(entityString);
            builder.append(")");
            i = end - 1;
        } else {
            builder.append(token);
        }

        // only append a whitespace, if the offsets actually differ
        if (i < tokens.length - 1) {
            if (spansWithRealOffsets[i].getEnd() != spansWithRealOffsets[i+1].getStart()) {
                builder.append(" ");
            }
        }
    }

    return builder.toString();
}
 
Example #26
Source File: TokenizerUnitTest.java    From tutorials with MIT License 4 votes vote down vote up
@Test
public void givenSimpleTokenizer_whenTokenize_thenTokensAreDetected() throws Exception {
    SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
    String[] tokens = tokenizer.tokenize("Baeldung is a Spring Resource.");
    assertThat(tokens).contains("Baeldung", "is", "a", "Spring", "Resource", ".");
}