org.deeplearning4j.text.sentenceiterator.CollectionSentenceIterator Java Examples

The following examples show how to use org.deeplearning4j.text.sentenceiterator.CollectionSentenceIterator. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TfidfVectorizerTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test(expected = ND4JIllegalStateException.class, timeout = 20000L)
public void testParallelFlag3() throws Exception {
    val collection = new ArrayList<String>();
    collection.add("First string");
    collection.add("Second string");
    collection.add("Third string");
    collection.add("");
    collection.add("Fifth string");
    collection.add("Long long long string");
    collection.add("Sixth string");

    val vectorizer = new TfidfVectorizer.Builder()
            .allowParallelTokenization(false)
            .setIterator(new CollectionSentenceIterator(collection))
            .setTokenizerFactory(new ExplodingTokenizerFactory(-1, 4))
            .build();

    vectorizer.buildVocab();


    log.info("Fitting vectorizer...");

    vectorizer.fit();
}
 
Example #2
Source File: CollectionSentenceIteratorExample.java    From Java-Deep-Learning-Cookbook with MIT License 5 votes vote down vote up
public static void main(String[] args) throws IOException {
    List<String> sentences = Arrays.asList(
            "No ,  he says now .",
            "And what did he do ?",
            "The money 's there .",
            "That was less than a year ago .",
            "But he made only the first .",
            "There 's still time for them to do it .",
            "But he should nt have .",
            " They have to come down to the people .",
            "I do nt know where that is .",
            "No , I would nt .",
            "Who Will It Be ?",
            "And no , I was not the one ."
    );
    SentenceIterator iterator = new CollectionSentenceIterator(sentences);
    int count=0;
    while(iterator.hasNext()){
        iterator.nextSentence();
        count++;
    }
    System.out.println("count = "+count);
    iterator.reset();
    SentenceDataPreProcessor.setPreprocessor(iterator);
    while(iterator.hasNext()){
        System.out.println(iterator.nextSentence());
    }
}
 
Example #3
Source File: CollectionSentenceIteratorExample.java    From Java-Deep-Learning-Cookbook with MIT License 5 votes vote down vote up
public static void main(String[] args) throws IOException {
    List<String> sentences = Arrays.asList(
            "No ,  he says now .",
            "And what did he do ?",
            "The money 's there .",
            "That was less than a year ago .",
            "But he made only the first .",
            "There 's still time for them to do it .",
            "But he should nt have .",
            " They have to come down to the people .",
            "I do nt know where that is .",
            "No , I would nt .",
            "Who Will It Be ?",
            "And no , I was not the one ."
    );
    SentenceIterator iterator = new CollectionSentenceIterator(sentences);
    int count=0;
    while(iterator.hasNext()){
        iterator.nextSentence();
        count++;
    }
    System.out.println("count = "+count);
    iterator.reset();
    SentenceDataPreProcessor.setPreprocessor(iterator);
    while(iterator.hasNext()){
        System.out.println(iterator.nextSentence());
    }
}
 
Example #4
Source File: TfidfVectorizerTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test(expected = ND4JIllegalStateException.class, timeout = 20000L)
    public void testParallelFlag2() throws Exception {
        val collection = new ArrayList<String>();
        collection.add("First string");
        collection.add("Second string");
        collection.add("Third string");
        collection.add("");
        collection.add("Fifth string");
//        collection.add("caboom");

        val vectorizer = new TfidfVectorizer.Builder()
                .allowParallelTokenization(false)
                .setIterator(new CollectionSentenceIterator(collection))
                .setTokenizerFactory(new ExplodingTokenizerFactory(8, -1))
                .build();

        vectorizer.buildVocab();


        log.info("Fitting vectorizer...");

        vectorizer.fit();
    }