Java Code Examples for org.deeplearning4j.models.embeddings.loader.WordVectorSerializer#loadStaticModel()

The following examples show how to use org.deeplearning4j.models.embeddings.loader.WordVectorSerializer#loadStaticModel() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: WordVectorSerializerTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/**
 * This method tests binary file loading as static model
 *
 * @throws Exception
 */
@Test
@Ignore("AB 2019/06/24 - Failing: Ignored to get to all passing baseline to prevent regressions via CI - see issue #7912")
public void testStaticLoaderBinary() throws Exception {

    logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());

    WordVectors vectorsLive = WordVectorSerializer.readWord2VecModel(binaryFile);
    WordVectors vectorsStatic = WordVectorSerializer.loadStaticModel(binaryFile);

    INDArray arrayLive = vectorsLive.getWordVectorMatrix("Morgan_Freeman");
    INDArray arrayStatic = vectorsStatic.getWordVectorMatrix("Morgan_Freeman");

    assertNotEquals(null, arrayLive);
    assertEquals(arrayLive, arrayStatic);
}
 
Example 2
Source File: WordVectorSerializerTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/**
 * This method tests ZIP file loading as static model
 *
 * @throws Exception
 */
@Test
@Ignore("AB 2019/06/24 - Failing: Ignored to get to all passing baseline to prevent regressions via CI - see issue #7912")
public void testStaticLoaderArchive() throws Exception {
    logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());

    File w2v = new ClassPathResource("word2vec.dl4j/file.w2v").getFile();

    WordVectors vectorsLive = WordVectorSerializer.readWord2Vec(w2v);
    WordVectors vectorsStatic = WordVectorSerializer.loadStaticModel(w2v);

    INDArray arrayLive = vectorsLive.getWordVectorMatrix("night");
    INDArray arrayStatic = vectorsStatic.getWordVectorMatrix("night");

    assertNotEquals(null, arrayLive);
    assertEquals(arrayLive, arrayStatic);
}
 
Example 3
Source File: AbstractTextEmbeddingIterator.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Initialize the word vectors from the given file
 */
public void initWordVectors() {

  if (wordVectors != null) {
    log.debug("Word vectors already loaded, skipping initialization.");
    return;
  }

  log.debug("Loading word vector model");

  final String path = wordVectorLocation.getAbsolutePath();
  final String pathLower = path.toLowerCase();
  if (pathLower.endsWith(".arff")) {
    loadEmbeddingFromArff(path);
  } else if (pathLower.endsWith(".csv")) {
    // Check if file is CSV
    boolean success = loadEmbeddingFromCSV(wordVectorLocation);
    if (!success) {
      throw new RuntimeException("Could not load the word vector file.");
    }
  } else if (pathLower.endsWith(".csv.gz")) {
    loadGZipped();
  } else {
    // If no file extension was caught before, try loading as is
    wordVectors = WordVectorSerializer.loadStaticModel(wordVectorLocation);
  }
}
 
Example 4
Source File: CnnTextFilesEmbeddingInstanceIteratorTest.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
public Instances makeData() throws Exception {
  final Instances data = TestUtil.makeTestDataset(42,
      100,
      0,
      0,
      1,
      0,
      0,
      1,
      Attribute.NUMERIC,
      1,
      false);

  WordVectors wordVectors = WordVectorSerializer
      .loadStaticModel(DatasetLoader.loadGoogleNewsVectors());
  String[] words = (String[]) wordVectors.vocab().words().toArray(new String[0]);

  Random rand = new Random(42);
  for (Instance inst : data) {
    StringBuilder sentence = new StringBuilder();
    for (int i = 0; i < 10; i++) {
      final int idx = rand.nextInt(words.length);
      sentence.append(" ").append(words[idx]);
    }
    inst.setValue(0, sentence.toString());
  }
  return data;
}
 
Example 5
Source File: CnnTextEmbeddingInstanceIteratorTest.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
public Instances makeData() throws Exception {
  final Instances data = TestUtil.makeTestDataset(42,
      100,
      0,
      0,
      1,
      0,
      0,
      1,
      Attribute.NUMERIC,
      1,
      false);

  WordVectors wordVectors = WordVectorSerializer
      .loadStaticModel(DatasetLoader.loadGoogleNewsVectors());
  String[] words = (String[]) wordVectors.vocab().words().toArray(new String[0]);

  Random rand = new Random(42);
  for (Instance inst : data) {
    StringBuilder sentence = new StringBuilder();
    for (int i = 0; i < 10; i++) {
      final int idx = rand.nextInt(words.length);
      sentence.append(" ").append(words[idx]);
    }
    inst.setValue(0, sentence.toString());
  }
  return data;
}
 
Example 6
Source File: AbstractTextEmbeddingIterator.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Initialize the word vectors from the given file
 */
public void initWordVectors() {

  if (wordVectors != null) {
    log.debug("Word vectors already loaded, skipping initialization.");
    return;
  }

  log.debug("Loading word vector model");

  final String path = wordVectorLocation.getAbsolutePath();
  final String pathLower = path.toLowerCase();
  if (pathLower.endsWith(".arff")) {
    loadEmbeddingFromArff(path);
  } else if (pathLower.endsWith(".csv")) {
    // Check if file is CSV
    boolean success = loadEmbeddingFromCSV(wordVectorLocation);
    if (!success) {
      throw new RuntimeException("Could not load the word vector file.");
    }
  } else if (pathLower.endsWith(".csv.gz")) {
    loadGZipped();
  } else {
    // If no file extension was caught before, try loading as is
    wordVectors = WordVectorSerializer.loadStaticModel(wordVectorLocation);
  }
}
 
Example 7
Source File: CnnTextFilesEmbeddingInstanceIteratorTest.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
public Instances makeData() throws Exception {
  final Instances data = TestUtil.makeTestDataset(42,
      100,
      0,
      0,
      1,
      0,
      0,
      1,
      Attribute.NUMERIC,
      1,
      false);

  WordVectors wordVectors = WordVectorSerializer
      .loadStaticModel(DatasetLoader.loadGoogleNewsVectors());
  String[] words = (String[]) wordVectors.vocab().words().toArray(new String[0]);

  Random rand = new Random(42);
  for (Instance inst : data) {
    StringBuilder sentence = new StringBuilder();
    for (int i = 0; i < 10; i++) {
      final int idx = rand.nextInt(words.length);
      sentence.append(" ").append(words[idx]);
    }
    inst.setValue(0, sentence.toString());
  }
  return data;
}
 
Example 8
Source File: CnnTextEmbeddingInstanceIteratorTest.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
public Instances makeData() throws Exception {
  final Instances data = TestUtil.makeTestDataset(42,
      100,
      0,
      0,
      1,
      0,
      0,
      1,
      Attribute.NUMERIC,
      1,
      false);

  WordVectors wordVectors = WordVectorSerializer
      .loadStaticModel(DatasetLoader.loadGoogleNewsVectors());
  String[] words = (String[]) wordVectors.vocab().words().toArray(new String[0]);

  Random rand = new Random(42);
  for (Instance inst : data) {
    StringBuilder sentence = new StringBuilder();
    for (int i = 0; i < 10; i++) {
      final int idx = rand.nextInt(words.length);
      sentence.append(" ").append(words[idx]);
    }
    inst.setValue(0, sentence.toString());
  }
  return data;
}
 
Example 9
Source File: WordVectorSerializerTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * This method here is only to test real google model few gigabytes worth
 * Keep it ignored, since it requirs full google model being present in system, which is 1.6gb compressed
 *
 * @throws Exception
 */
@Test
@Ignore
public void testStaticLoaderGoogleModel() throws Exception {
    logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());

    long time1 = System.currentTimeMillis();
    WordVectors vectors = WordVectorSerializer
                    .loadStaticModel(new File("C:\\Users\\raver\\develop\\GoogleNews-vectors-negative300.bin.gz"));
    long time2 = System.currentTimeMillis();

    logger.info("Loading time: {} ms", (time2 - time1));
}
 
Example 10
Source File: WordVectorSerializerTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
@Ignore("AB 2019/06/24 - Failing: Ignored to get to all passing baseline to prevent regressions via CI - see issue #7912")
public void testStaticLoaderFromStream() throws Exception {

    logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());

    WordVectors vectorsLive = WordVectorSerializer.readWord2VecModel(binaryFile);
    WordVectors vectorsStatic = WordVectorSerializer.loadStaticModel(new FileInputStream(binaryFile));

    INDArray arrayLive = vectorsLive.getWordVectorMatrix("Morgan_Freeman");
    INDArray arrayStatic = vectorsStatic.getWordVectorMatrix("Morgan_Freeman");

    assertNotEquals(null, arrayLive);
    assertEquals(arrayLive, arrayStatic);
}
 
Example 11
Source File: WordVectorSerializerTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * This method tests CSV file loading as static model
 *
 * @throws Exception
 */
@Test
@Ignore("AB 2019/06/24 - Failing: Ignored to get to all passing baseline to prevent regressions via CI - see issue #7912")
public void testStaticLoaderText() throws Exception {
    logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());

    WordVectors vectorsLive = WordVectorSerializer.loadTxtVectors(textFile);
    WordVectors vectorsStatic = WordVectorSerializer.loadStaticModel(textFile);

    INDArray arrayLive = vectorsLive.getWordVectorMatrix("Morgan_Freeman");
    INDArray arrayStatic = vectorsStatic.getWordVectorMatrix("Morgan_Freeman");

    assertNotEquals(null, arrayLive);
    assertEquals(arrayLive, arrayStatic);
}