Java Code Examples for org.deeplearning4j.models.word2vec.wordstore.VocabCache#numWords()

The following examples show how to use org.deeplearning4j.models.word2vec.wordstore.VocabCache#numWords() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TSNEVisualizationExample.java    From Java-Deep-Learning-Cookbook with MIT License 6 votes vote down vote up
public static void main(String[] args) throws IOException {
    Nd4j.setDataType(DataBuffer.Type.DOUBLE);
    List<String> cacheList = new ArrayList<>();
    File file = new File("words.txt");
    String outputFile = "tsne-standard-coords.csv";
    Pair<InMemoryLookupTable,VocabCache> vectors = WordVectorSerializer.loadTxt(file);
    VocabCache cache = vectors.getSecond();
    INDArray weights = vectors.getFirst().getSyn0();

    for(int i=0;i<cache.numWords();i++){
        cacheList.add(cache.wordAtIndex(i));
    }

    BarnesHutTsne tsne = new BarnesHutTsne.Builder()
                                            .setMaxIter(100)
                                            .theta(0.5)
                                            .normalize(false)
                                            .learningRate(500)
                                            .useAdaGrad(false)
                                            .build();

    tsne.fit(weights);
    tsne.saveAsFile(cacheList,outputFile);

}
 
Example 2
Source File: TSNEVisualizationExample.java    From Java-Deep-Learning-Cookbook with MIT License 6 votes vote down vote up
public static void main(String[] args) throws IOException {
    Nd4j.setDataType(DataBuffer.Type.DOUBLE);
    List<String> cacheList = new ArrayList<>();
    File file = new File("words.txt");
    String outputFile = "tsne-standard-coords.csv";
    Pair<InMemoryLookupTable,VocabCache> vectors = WordVectorSerializer.loadTxt(file);
    VocabCache cache = vectors.getSecond();
    INDArray weights = vectors.getFirst().getSyn0();

    for(int i=0;i<cache.numWords();i++){
        cacheList.add(cache.wordAtIndex(i));
    }

    BarnesHutTsne tsne = new BarnesHutTsne.Builder()
                                            .setMaxIter(100)
                                            .theta(0.5)
                                            .normalize(false)
                                            .learningRate(500)
                                            .useAdaGrad(false)
                                            .build();

    tsne.fit(weights);
    tsne.saveAsFile(cacheList,outputFile);

}
 
Example 3
Source File: WordVectorSerializer.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/**
 * This method saves specified SequenceVectors model to target  OutputStream
 *
 * @param vectors SequenceVectors model
 * @param factory SequenceElementFactory implementation for your objects
 * @param stream  Target output stream
 * @param <T>
 */
public static <T extends SequenceElement> void writeSequenceVectors(@NonNull SequenceVectors<T> vectors,
                                                                    @NonNull SequenceElementFactory<T> factory, @NonNull OutputStream stream) throws IOException {
    WeightLookupTable<T> lookupTable = vectors.getLookupTable();
    VocabCache<T> vocabCache = vectors.getVocab();

    try (PrintWriter writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(stream, StandardCharsets.UTF_8)))) {

        // at first line we save VectorsConfiguration
        writer.write(vectors.getConfiguration().toEncodedJson());

        // now we have elements one by one
        for (int x = 0; x < vocabCache.numWords(); x++) {
            T element = vocabCache.elementAtIndex(x);
            String json = factory.serialize(element);
            INDArray d = Nd4j.create(1);
            double[] vector = lookupTable.vector(element.getLabel()).dup().data().asDouble();
            ElementPair pair = new ElementPair(json, vector);
            writer.println(pair.toEncodedJson());
            writer.flush();
        }
    }
}
 
Example 4
Source File: WordVectorSerializer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * This method saves vocab cache to provided OutputStream.
 * Please note: it saves only vocab content, so it's suitable mostly for BagOfWords/TF-IDF vectorizers
 *
 * @param vocabCache
 * @param stream
 * @throws UnsupportedEncodingException
 */
public static void writeVocabCache(@NonNull VocabCache<VocabWord> vocabCache, @NonNull OutputStream stream)
        throws IOException {
    try (PrintWriter writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(stream, StandardCharsets.UTF_8)))) {
        // saving general vocab information
        writer.println("" + vocabCache.numWords() + " " + vocabCache.totalNumberOfDocs() + " " + vocabCache.totalWordOccurrences());

        for (int x = 0; x < vocabCache.numWords(); x++) {
            VocabWord word = vocabCache.elementAtIndex(x);
            writer.println(word.toJSON());
        }
    }
}
 
Example 5
Source File: TsneTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testSimple() throws Exception {
    //Simple sanity check

    for( int test=0; test <=1; test++){
        boolean syntheticData = test == 1;
        WorkspaceMode wsm = test == 0 ? WorkspaceMode.NONE : WorkspaceMode.ENABLED;
        log.info("Starting test: WSM={}, syntheticData={}", wsm, syntheticData);

        //STEP 1: Initialization
        int iterations = 50;
        //create an n-dimensional array of doubles
        Nd4j.setDefaultDataTypes(DataType.FLOAT, DataType.FLOAT);
        List<String> cacheList = new ArrayList<>(); //cacheList is a dynamic array of strings used to hold all words

        //STEP 2: Turn text input into a list of words
        INDArray weights;
        if(syntheticData){
            weights = Nd4j.rand(250, 200);
        } else {
            log.info("Load & Vectorize data....");
            File wordFile = new ClassPathResource("deeplearning4j-tsne/words.txt").getFile();   //Open the file
            //Get the data of all unique word vectors
            Pair<InMemoryLookupTable, VocabCache> vectors = WordVectorSerializer.loadTxt(wordFile);
            VocabCache cache = vectors.getSecond();
            weights = vectors.getFirst().getSyn0();    //seperate weights of unique words into their own list

            for (int i = 0; i < cache.numWords(); i++)   //seperate strings of words into their own list
                cacheList.add(cache.wordAtIndex(i));
        }

        //STEP 3: build a dual-tree tsne to use later
        log.info("Build model....");
        BarnesHutTsne tsne = new BarnesHutTsne.Builder()
                .setMaxIter(iterations)
                .theta(0.5)
                .normalize(false)
                .learningRate(500)
                .useAdaGrad(false)
                .workspaceMode(wsm)
                .build();


        //STEP 4: establish the tsne values and save them to a file
        log.info("Store TSNE Coordinates for Plotting....");
        File outDir = testDir.newFolder();
        tsne.fit(weights);
        tsne.saveAsFile(cacheList, new File(outDir, "out.txt").getAbsolutePath());
    }
}
 
Example 6
Source File: TsneTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testPerformance() throws Exception {

    StopWatch watch = new StopWatch();
    watch.start();
    for( int test=0; test <=1; test++){
        boolean syntheticData = test == 1;
        WorkspaceMode wsm = test == 0 ? WorkspaceMode.NONE : WorkspaceMode.ENABLED;
        log.info("Starting test: WSM={}, syntheticData={}", wsm, syntheticData);

        //STEP 1: Initialization
        int iterations = 50;
        //create an n-dimensional array of doubles
        Nd4j.setDefaultDataTypes(DataType.FLOAT, DataType.FLOAT);
        List<String> cacheList = new ArrayList<>(); //cacheList is a dynamic array of strings used to hold all words

        //STEP 2: Turn text input into a list of words
        INDArray weights;
        if(syntheticData){
            weights = Nd4j.rand(DataType.FLOAT, 250, 20);
        } else {
            log.info("Load & Vectorize data....");
            File wordFile = new ClassPathResource("deeplearning4j-tsne/words.txt").getFile();   //Open the file
            //Get the data of all unique word vectors
            Pair<InMemoryLookupTable, VocabCache> vectors = WordVectorSerializer.loadTxt(wordFile);
            VocabCache cache = vectors.getSecond();
            weights = vectors.getFirst().getSyn0();    //seperate weights of unique words into their own list

            for (int i = 0; i < cache.numWords(); i++)   //seperate strings of words into their own list
                cacheList.add(cache.wordAtIndex(i));
        }

        //STEP 3: build a dual-tree tsne to use later
        log.info("Build model....");
        BarnesHutTsne tsne = new BarnesHutTsne.Builder()
                .setMaxIter(iterations)
                .theta(0.5)
                .normalize(false)
                .learningRate(500)
                .useAdaGrad(false)
                .workspaceMode(wsm)
                .build();


        //STEP 4: establish the tsne values and save them to a file
        log.info("Store TSNE Coordinates for Plotting....");
        File outDir = testDir.newFolder();
        tsne.fit(weights);
        tsne.saveAsFile(cacheList, new File(outDir, "out.txt").getAbsolutePath());
    }
    watch.stop();
    System.out.println("Elapsed time : " + watch);
}