Java Code Examples for org.deeplearning4j.models.embeddings.loader.WordVectorSerializer#loadTxt()

The following examples show how to use org.deeplearning4j.models.embeddings.loader.WordVectorSerializer#loadTxt() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TSNEVisualizationExample.java    From Java-Deep-Learning-Cookbook with MIT License 6 votes vote down vote up
public static void main(String[] args) throws IOException {
    Nd4j.setDataType(DataBuffer.Type.DOUBLE);
    List<String> cacheList = new ArrayList<>();
    File file = new File("words.txt");
    String outputFile = "tsne-standard-coords.csv";
    Pair<InMemoryLookupTable,VocabCache> vectors = WordVectorSerializer.loadTxt(file);
    VocabCache cache = vectors.getSecond();
    INDArray weights = vectors.getFirst().getSyn0();

    for(int i=0;i<cache.numWords();i++){
        cacheList.add(cache.wordAtIndex(i));
    }

    BarnesHutTsne tsne = new BarnesHutTsne.Builder()
                                            .setMaxIter(100)
                                            .theta(0.5)
                                            .normalize(false)
                                            .learningRate(500)
                                            .useAdaGrad(false)
                                            .build();

    tsne.fit(weights);
    tsne.saveAsFile(cacheList,outputFile);

}
 
Example 2
Source File: TSNEVisualizationExample.java    From Java-Deep-Learning-Cookbook with MIT License 6 votes vote down vote up
public static void main(String[] args) throws IOException {
    Nd4j.setDataType(DataBuffer.Type.DOUBLE);
    List<String> cacheList = new ArrayList<>();
    File file = new File("words.txt");
    String outputFile = "tsne-standard-coords.csv";
    Pair<InMemoryLookupTable,VocabCache> vectors = WordVectorSerializer.loadTxt(file);
    VocabCache cache = vectors.getSecond();
    INDArray weights = vectors.getFirst().getSyn0();

    for(int i=0;i<cache.numWords();i++){
        cacheList.add(cache.wordAtIndex(i));
    }

    BarnesHutTsne tsne = new BarnesHutTsne.Builder()
                                            .setMaxIter(100)
                                            .theta(0.5)
                                            .normalize(false)
                                            .learningRate(500)
                                            .useAdaGrad(false)
                                            .build();

    tsne.fit(weights);
    tsne.saveAsFile(cacheList,outputFile);

}
 
Example 3
Source File: TsneTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testSimple() throws Exception {
    //Simple sanity check

    for( int test=0; test <=1; test++){
        boolean syntheticData = test == 1;
        WorkspaceMode wsm = test == 0 ? WorkspaceMode.NONE : WorkspaceMode.ENABLED;
        log.info("Starting test: WSM={}, syntheticData={}", wsm, syntheticData);

        //STEP 1: Initialization
        int iterations = 50;
        //create an n-dimensional array of doubles
        Nd4j.setDefaultDataTypes(DataType.FLOAT, DataType.FLOAT);
        List<String> cacheList = new ArrayList<>(); //cacheList is a dynamic array of strings used to hold all words

        //STEP 2: Turn text input into a list of words
        INDArray weights;
        if(syntheticData){
            weights = Nd4j.rand(250, 200);
        } else {
            log.info("Load & Vectorize data....");
            File wordFile = new ClassPathResource("deeplearning4j-tsne/words.txt").getFile();   //Open the file
            //Get the data of all unique word vectors
            Pair<InMemoryLookupTable, VocabCache> vectors = WordVectorSerializer.loadTxt(wordFile);
            VocabCache cache = vectors.getSecond();
            weights = vectors.getFirst().getSyn0();    //seperate weights of unique words into their own list

            for (int i = 0; i < cache.numWords(); i++)   //seperate strings of words into their own list
                cacheList.add(cache.wordAtIndex(i));
        }

        //STEP 3: build a dual-tree tsne to use later
        log.info("Build model....");
        BarnesHutTsne tsne = new BarnesHutTsne.Builder()
                .setMaxIter(iterations)
                .theta(0.5)
                .normalize(false)
                .learningRate(500)
                .useAdaGrad(false)
                .workspaceMode(wsm)
                .build();


        //STEP 4: establish the tsne values and save them to a file
        log.info("Store TSNE Coordinates for Plotting....");
        File outDir = testDir.newFolder();
        tsne.fit(weights);
        tsne.saveAsFile(cacheList, new File(outDir, "out.txt").getAbsolutePath());
    }
}
 
Example 4
Source File: TsneTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testPerformance() throws Exception {

    StopWatch watch = new StopWatch();
    watch.start();
    for( int test=0; test <=1; test++){
        boolean syntheticData = test == 1;
        WorkspaceMode wsm = test == 0 ? WorkspaceMode.NONE : WorkspaceMode.ENABLED;
        log.info("Starting test: WSM={}, syntheticData={}", wsm, syntheticData);

        //STEP 1: Initialization
        int iterations = 50;
        //create an n-dimensional array of doubles
        Nd4j.setDefaultDataTypes(DataType.FLOAT, DataType.FLOAT);
        List<String> cacheList = new ArrayList<>(); //cacheList is a dynamic array of strings used to hold all words

        //STEP 2: Turn text input into a list of words
        INDArray weights;
        if(syntheticData){
            weights = Nd4j.rand(DataType.FLOAT, 250, 20);
        } else {
            log.info("Load & Vectorize data....");
            File wordFile = new ClassPathResource("deeplearning4j-tsne/words.txt").getFile();   //Open the file
            //Get the data of all unique word vectors
            Pair<InMemoryLookupTable, VocabCache> vectors = WordVectorSerializer.loadTxt(wordFile);
            VocabCache cache = vectors.getSecond();
            weights = vectors.getFirst().getSyn0();    //seperate weights of unique words into their own list

            for (int i = 0; i < cache.numWords(); i++)   //seperate strings of words into their own list
                cacheList.add(cache.wordAtIndex(i));
        }

        //STEP 3: build a dual-tree tsne to use later
        log.info("Build model....");
        BarnesHutTsne tsne = new BarnesHutTsne.Builder()
                .setMaxIter(iterations)
                .theta(0.5)
                .normalize(false)
                .learningRate(500)
                .useAdaGrad(false)
                .workspaceMode(wsm)
                .build();


        //STEP 4: establish the tsne values and save them to a file
        log.info("Store TSNE Coordinates for Plotting....");
        File outDir = testDir.newFolder();
        tsne.fit(weights);
        tsne.saveAsFile(cacheList, new File(outDir, "out.txt").getAbsolutePath());
    }
    watch.stop();
    System.out.println("Elapsed time : " + watch);
}