org.deeplearning4j.text.sentenceiterator.UimaSentenceIterator Java Examples

The following examples show how to use org.deeplearning4j.text.sentenceiterator.UimaSentenceIterator. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: UimaSentenceIteratorExample.java    From Java-Deep-Learning-Cookbook with MIT License 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    SentenceIterator iterator = UimaSentenceIterator.createWithPath("files/");
    int count=0;
    while(iterator.hasNext()){
        iterator.nextSentence();
        count++;
    }
    System.out.println("count = "+count);
    iterator.reset();
    SentenceDataPreProcessor.setPreprocessor(iterator);
    while(iterator.hasNext()){
        System.out.println(iterator.nextSentence());
    }

}
 
Example #2
Source File: UimaSentenceIteratorExample.java    From Java-Deep-Learning-Cookbook with MIT License 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    SentenceIterator iterator = UimaSentenceIterator.createWithPath("files/");
    int count=0;
    while(iterator.hasNext()){
        iterator.nextSentence();
        count++;
    }
    System.out.println("count = "+count);
    iterator.reset();
    SentenceDataPreProcessor.setPreprocessor(iterator);
    while(iterator.hasNext()){
        System.out.println(iterator.nextSentence());
    }

}
 
Example #3
Source File: Word2VecRawTextExample.java    From Java-Data-Science-Cookbook with MIT License 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        // Gets Path to Text file
        String filePath = "c:/raw_sentences.txt";

        log.info("Load & Vectorize Sentences....");
        // Strip white space before and after for each line
        SentenceIterator iter = UimaSentenceIterator.createWithPath(filePath);
        // Split on white spaces in the line to get words
        TokenizerFactory t = new DefaultTokenizerFactory();
        t.setTokenPreProcessor(new CommonPreprocessor());

        InMemoryLookupCache cache = new InMemoryLookupCache();
        WeightLookupTable table = new InMemoryLookupTable.Builder()
                .vectorLength(100)
                .useAdaGrad(false)
                .cache(cache)
                .lr(0.025f).build();

        log.info("Building model....");
        Word2Vec vec = new Word2Vec.Builder()
                .minWordFrequency(5).iterations(1)
                .layerSize(100).lookupTable(table)
                .stopWords(new ArrayList<String>())
                .vocabCache(cache).seed(42)
                .windowSize(5).iterate(iter).tokenizerFactory(t).build();

        log.info("Fitting Word2Vec model....");
        vec.fit();

        log.info("Writing word vectors to text file....");
        // Write word
        WordVectorSerializer.writeWordVectors(vec, "word2vec.txt");

        log.info("Closest Words:");
        Collection<String> lst = vec.wordsNearest("man", 5); 
        System.out.println(lst);
        double cosSim = vec.similarity("cruise", "voyage");
        System.out.println(cosSim);
    }