Java Code Examples for org.deeplearning4j.models.embeddings.wordvectors.WordVectors#getWordVector()

The following examples show how to use org.deeplearning4j.models.embeddings.wordvectors.WordVectors#getWordVector() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RnnTextEmbeddingDataSetIterator.java    From wekaDeeplearning4j with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Constructor with necessary objects to create RNN features.
 *
 * @param data Instances with documents and labels
 * @param wordVectors WordVectors object
 * @param tokenFact Tokenizer factory
 * @param tpp Token pre processor
 * @param stopWords Stop word object
 * @param batchSize Size of each minibatch for training
 * @param truncateLength If reviews exceed
 */
public RnnTextEmbeddingDataSetIterator(
    Instances data,
    WordVectors wordVectors,
    TokenizerFactory tokenFact,
    TokenPreProcess tpp,
    AbstractStopwords stopWords,
    LabeledSentenceProvider sentenceProvider,
    int batchSize,
    int truncateLength) {
  this.batchSize = batchSize;
  this.wordVectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length;
  this.data = data;
  this.wordVectors = wordVectors;
  this.truncateLength = truncateLength;
  this.tokenizerFactory = tokenFact;
  this.tokenizerFactory.getBackend().setTokenPreProcessor(tpp.getBackend());
  this.stopWords = stopWords;
  this.sentenceProvider = sentenceProvider;
}
 
Example 2
Source File: RnnTextEmbeddingDataSetIterator.java    From wekaDeeplearning4j with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Constructor with necessary objects to create RNN features.
 *
 * @param data Instances with documents and labels
 * @param wordVectors WordVectors object
 * @param tokenFact Tokenizer factory
 * @param tpp Token pre processor
 * @param stopWords Stop word object
 * @param batchSize Size of each minibatch for training
 * @param truncateLength If reviews exceed
 */
public RnnTextEmbeddingDataSetIterator(
    Instances data,
    WordVectors wordVectors,
    TokenizerFactory tokenFact,
    TokenPreProcess tpp,
    AbstractStopwords stopWords,
    LabeledSentenceProvider sentenceProvider,
    int batchSize,
    int truncateLength) {
  this.batchSize = batchSize;
  this.wordVectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length;
  this.data = data;
  this.wordVectors = wordVectors;
  this.truncateLength = truncateLength;
  this.tokenizerFactory = tokenFact;
  this.tokenizerFactory.getBackend().setTokenPreProcessor(tpp.getBackend());
  this.stopWords = stopWords;
  this.sentenceProvider = sentenceProvider;
}
 
Example 3
Source File: WordVectorSerializerTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
@Ignore
public void testWriteWordVectors() throws IOException {
    WordVectors vec = WordVectorSerializer.readWord2VecModel(binaryFile);
    InMemoryLookupTable lookupTable = (InMemoryLookupTable) vec.lookupTable();
    InMemoryLookupCache lookupCache = (InMemoryLookupCache) vec.vocab();
    WordVectorSerializer.writeWordVectors(lookupTable, lookupCache, pathToWriteto);

    WordVectors wordVectors = WordVectorSerializer.loadTxtVectors(new File(pathToWriteto));
    double[] wordVector1 = wordVectors.getWordVector("Morgan_Freeman");
    double[] wordVector2 = wordVectors.getWordVector("JA_Montalbano");
    assertTrue(wordVector1.length == 300);
    assertTrue(wordVector2.length == 300);
    assertEquals(Doubles.asList(wordVector1).get(0), 0.044423, 1e-3);
    assertEquals(Doubles.asList(wordVector2).get(0), 0.051964, 1e-3);
}
 
Example 4
Source File: WordVectorSerializerTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
@Ignore
public void testFromTableAndVocab() throws IOException {

    WordVectors vec = WordVectorSerializer.readWord2VecModel(textFile);
    InMemoryLookupTable lookupTable = (InMemoryLookupTable) vec.lookupTable();
    InMemoryLookupCache lookupCache = (InMemoryLookupCache) vec.vocab();

    WordVectors wordVectors = WordVectorSerializer.fromTableAndVocab(lookupTable, lookupCache);
    double[] wordVector1 = wordVectors.getWordVector("Morgan_Freeman");
    double[] wordVector2 = wordVectors.getWordVector("JA_Montalbano");
    assertTrue(wordVector1.length == 300);
    assertTrue(wordVector2.length == 300);
    assertEquals(Doubles.asList(wordVector1).get(0), 0.044423, 1e-3);
    assertEquals(Doubles.asList(wordVector2).get(0), 0.051964, 1e-3);
}
 
Example 5
Source File: Dl4jMlpTest.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
@Test
public void testTextCnnTextFilesRegression() throws Exception {
  CnnTextFilesEmbeddingInstanceIterator cnnTextIter = new CnnTextFilesEmbeddingInstanceIterator();
  cnnTextIter.setTrainBatchSize(64);
  cnnTextIter.setWordVectorLocation(DatasetLoader.loadGoogleNewsVectors());
  cnnTextIter.setTextsLocation(DatasetLoader.loadAngerFilesDir());
  clf.setInstanceIterator(cnnTextIter);

  cnnTextIter.initialize();
  final WordVectors wordVectors = cnnTextIter.getWordVectors();
  int vectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length;

  ConvolutionLayer conv1 = new ConvolutionLayer();
  conv1.setKernelSize(new int[]{3, vectorSize});
  conv1.setNOut(10);
  conv1.setStride(new int[]{1, vectorSize});
  conv1.setConvolutionMode(ConvolutionMode.Same);

  ConvolutionLayer conv2 = new ConvolutionLayer();
  conv2.setKernelSize(new int[]{2, vectorSize});
  conv2.setNOut(10);
  conv2.setStride(new int[]{1, vectorSize});
  conv2.setConvolutionMode(ConvolutionMode.Same);

  GlobalPoolingLayer gpl = new GlobalPoolingLayer();

  OutputLayer out = new OutputLayer();
  out.setLossFn(new LossMSE());
  out.setActivationFunction(new ActivationIdentity());

  clf.setLayers(conv1, conv2, gpl, out);
  clf.setCacheMode(CacheMode.MEMORY);
  final Instances data = DatasetLoader.loadAngerMeta();
  TestUtil.holdout(clf, data);
}
 
Example 6
Source File: Dl4jMlpTest.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
@Test
public void testTextCnnTextFilesRegression() throws Exception {
  CnnTextFilesEmbeddingInstanceIterator cnnTextIter = new CnnTextFilesEmbeddingInstanceIterator();
  cnnTextIter.setTrainBatchSize(64);
  cnnTextIter.setWordVectorLocation(DatasetLoader.loadGoogleNewsVectors());
  cnnTextIter.setTextsLocation(DatasetLoader.loadAngerFilesDir());
  clf.setInstanceIterator(cnnTextIter);

  cnnTextIter.initialize();
  final WordVectors wordVectors = cnnTextIter.getWordVectors();
  int vectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length;

  ConvolutionLayer conv1 = new ConvolutionLayer();
  conv1.setKernelSize(new int[]{3, vectorSize});
  conv1.setNOut(10);
  conv1.setStride(new int[]{1, vectorSize});
  conv1.setConvolutionMode(ConvolutionMode.Same);

  ConvolutionLayer conv2 = new ConvolutionLayer();
  conv2.setKernelSize(new int[]{2, vectorSize});
  conv2.setNOut(10);
  conv2.setStride(new int[]{1, vectorSize});
  conv2.setConvolutionMode(ConvolutionMode.Same);

  GlobalPoolingLayer gpl = new GlobalPoolingLayer();

  OutputLayer out = new OutputLayer();
  out.setLossFn(new LossMSE());
  out.setActivationFunction(new ActivationIdentity());

  clf.setLayers(conv1, conv2, gpl, out);
  clf.setCacheMode(CacheMode.MEMORY);
  final Instances data = DatasetLoader.loadAngerMeta();
  TestUtil.holdout(clf, data);
}
 
Example 7
Source File: WordVectorSerializerTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testLoaderBinary() throws IOException {
    WordVectors vec = WordVectorSerializer.readWord2VecModel(binaryFile);
    assertEquals(vec.vocab().numWords(), 30);
    assertTrue(vec.vocab().hasToken("Morgan_Freeman"));
    assertTrue(vec.vocab().hasToken("JA_Montalbano"));
    double[] wordVector1 = vec.getWordVector("Morgan_Freeman");
    double[] wordVector2 = vec.getWordVector("JA_Montalbano");
    assertTrue(wordVector1.length == 300);
    assertTrue(wordVector2.length == 300);
    assertEquals(Doubles.asList(wordVector1).get(0), 0.044423, 1e-3);
    assertEquals(Doubles.asList(wordVector2).get(0), 0.051964, 1e-3);
}
 
Example 8
Source File: Dl4jMlpTest.java    From wekaDeeplearning4j with GNU General Public License v3.0 4 votes vote down vote up
@Test
public void testTextCnnClassification() throws Exception {
  CnnTextEmbeddingInstanceIterator cnnTextIter = new CnnTextEmbeddingInstanceIterator();
  cnnTextIter.setTrainBatchSize(128);
  cnnTextIter.setWordVectorLocation(DatasetLoader.loadGoogleNewsVectors());
  clf.setInstanceIterator(cnnTextIter);

  cnnTextIter.initialize();
  final WordVectors wordVectors = cnnTextIter.getWordVectors();
  int vectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length;

  ConvolutionLayer conv1 = new ConvolutionLayer();
  conv1.setKernelSize(new int[]{4, vectorSize});
  conv1.setNOut(10);
  conv1.setStride(new int[]{1, vectorSize});
  conv1.setConvolutionMode(ConvolutionMode.Same);
  conv1.setActivationFunction(new ActivationReLU());

  BatchNormalization bn1 = new BatchNormalization();

  ConvolutionLayer conv2 = new ConvolutionLayer();
  conv2.setKernelSize(new int[]{3, vectorSize});
  conv2.setNOut(10);
  conv2.setStride(new int[]{1, vectorSize});
  conv2.setConvolutionMode(ConvolutionMode.Same);
  conv2.setActivationFunction(new ActivationReLU());

  BatchNormalization bn2 = new BatchNormalization();

  ConvolutionLayer conv3 = new ConvolutionLayer();
  conv3.setKernelSize(new int[]{2, vectorSize});
  conv3.setNOut(10);
  conv3.setStride(new int[]{1, vectorSize});
  conv3.setConvolutionMode(ConvolutionMode.Same);
  conv3.setActivationFunction(new ActivationReLU());

  BatchNormalization bn3 = new BatchNormalization();

  GlobalPoolingLayer gpl = new GlobalPoolingLayer();

  OutputLayer out = new OutputLayer();

  //    clf.setLayers(conv1, bn1, conv2, bn2, conv3, bn3, gpl, out);
  clf.setLayers(conv1, conv2, conv3, gpl, out);
  //    clf.setNumEpochs(50);
  clf.setCacheMode(CacheMode.MEMORY);
  final EpochListener l = new EpochListener();
  l.setN(1);
  clf.setIterationListener(l);

  clf.setEarlyStopping(new EarlyStopping(10, 15));
  clf.setDebug(true);

  // NNC
  NeuralNetConfiguration nnc = new NeuralNetConfiguration();
  nnc.setL2(1e-3);
  final Dropout dropout = new Dropout();
  dropout.setP(0.2);
  nnc.setDropout(dropout);
  clf.setNeuralNetConfiguration(nnc);

  // Data
  final Instances data = DatasetLoader.loadImdb();
  data.randomize(new Random(42));
  RemovePercentage rp = new RemovePercentage();
  rp.setInputFormat(data);
  rp.setPercentage(98);
  final Instances dataFiltered = Filter.useFilter(data, rp);

  TestUtil.holdout(clf, dataFiltered);
}
 
Example 9
Source File: Dl4jMlpTest.java    From wekaDeeplearning4j with GNU General Public License v3.0 4 votes vote down vote up
@Test
public void testTextCnnRegression() throws Exception {
  CnnTextEmbeddingInstanceIterator cnnTextIter = new CnnTextEmbeddingInstanceIterator();
  cnnTextIter.setTrainBatchSize(64);
  cnnTextIter.setWordVectorLocation(DatasetLoader.loadGoogleNewsVectors());
  clf.setInstanceIterator(cnnTextIter);

  cnnTextIter.initialize();
  final WordVectors wordVectors = cnnTextIter.getWordVectors();
  int vectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length;

  ConvolutionLayer conv1 = new ConvolutionLayer();
  conv1.setKernelSize(new int[]{3, vectorSize});
  conv1.setNOut(10);
  conv1.setStride(new int[]{1, vectorSize});
  conv1.setConvolutionMode(ConvolutionMode.Same);

  ConvolutionLayer conv2 = new ConvolutionLayer();
  conv2.setKernelSize(new int[]{2, vectorSize});
  conv2.setNOut(10);
  conv2.setStride(new int[]{1, vectorSize});
  conv2.setConvolutionMode(ConvolutionMode.Same);

  GlobalPoolingLayer gpl = new GlobalPoolingLayer();

  OutputLayer out = new OutputLayer();
  out.setLossFn(new LossMSE());
  out.setActivationFunction(new ActivationIdentity());

  clf.setLayers(conv1, conv2, gpl, out);
  //    clf.setNumEpochs(200);
  clf.setCacheMode(CacheMode.MEMORY);
  final EpochListener l = new EpochListener();
  l.setN(20);
  clf.setIterationListener(l);
  clf.setDebug(true);
  clf.setEarlyStopping(new EarlyStopping(0, 10));
  final Instances data = DatasetLoader.loadAnger();

  NeuralNetConfiguration nnc = new NeuralNetConfiguration();
  nnc.setL2(0.00001);
  Adam opt = new Adam();
  opt.setLearningRate(0.001);
  nnc.setUpdater(opt);

  clf.setNeuralNetConfiguration(nnc);
  TestUtil.holdout(clf, data);
}
 
Example 10
Source File: Dl4jMlpTest.java    From wekaDeeplearning4j with GNU General Public License v3.0 4 votes vote down vote up
@Test
public void testTextCnnTextFilesClassification() throws Exception {
  CnnTextFilesEmbeddingInstanceIterator cnnTextIter = new CnnTextFilesEmbeddingInstanceIterator();
  cnnTextIter.setTrainBatchSize(64);
  cnnTextIter.setWordVectorLocation(DatasetLoader.loadGoogleNewsVectors());
  cnnTextIter.setTextsLocation(DatasetLoader.loadAngerFilesDir());
  clf.setInstanceIterator(cnnTextIter);

  cnnTextIter.initialize();
  final WordVectors wordVectors = cnnTextIter.getWordVectors();
  int vectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length;

  ConvolutionLayer conv1 = new ConvolutionLayer();
  conv1.setKernelSize(new int[]{4, vectorSize});
  conv1.setNOut(10);
  conv1.setStride(new int[]{1, vectorSize});
  conv1.setConvolutionMode(ConvolutionMode.Same);
  conv1.setActivationFunction(new ActivationReLU());

  ConvolutionLayer conv2 = new ConvolutionLayer();
  conv2.setKernelSize(new int[]{3, vectorSize});
  conv2.setNOut(10);
  conv2.setStride(new int[]{1, vectorSize});
  conv2.setConvolutionMode(ConvolutionMode.Same);
  conv2.setActivationFunction(new ActivationReLU());

  GlobalPoolingLayer gpl = new GlobalPoolingLayer();

  OutputLayer out = new OutputLayer();

  NeuralNetConfiguration nnc = new NeuralNetConfiguration();
  Dropout d = new Dropout();
  d.setP(0.2);
  nnc.setDropout(d);
  clf.setNeuralNetConfiguration(nnc);

  clf.setLayers(conv1, conv2, gpl, out);
  clf.setCacheMode(CacheMode.MEMORY);
  final Instances data = DatasetLoader.loadAngerMetaClassification();
  TestUtil.holdout(clf, data);
}
 
Example 11
Source File: Dl4jMlpTest.java    From wekaDeeplearning4j with GNU General Public License v3.0 4 votes vote down vote up
@Test
public void testTextCnnClassification() throws Exception {
  CnnTextEmbeddingInstanceIterator cnnTextIter = new CnnTextEmbeddingInstanceIterator();
  cnnTextIter.setTrainBatchSize(128);
  cnnTextIter.setWordVectorLocation(DatasetLoader.loadGoogleNewsVectors());
  clf.setInstanceIterator(cnnTextIter);

  cnnTextIter.initialize();
  final WordVectors wordVectors = cnnTextIter.getWordVectors();
  int vectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length;

  ConvolutionLayer conv1 = new ConvolutionLayer();
  conv1.setKernelSize(new int[]{4, vectorSize});
  conv1.setNOut(10);
  conv1.setStride(new int[]{1, vectorSize});
  conv1.setConvolutionMode(ConvolutionMode.Same);
  conv1.setActivationFunction(new ActivationReLU());

  BatchNormalization bn1 = new BatchNormalization();

  ConvolutionLayer conv2 = new ConvolutionLayer();
  conv2.setKernelSize(new int[]{3, vectorSize});
  conv2.setNOut(10);
  conv2.setStride(new int[]{1, vectorSize});
  conv2.setConvolutionMode(ConvolutionMode.Same);
  conv2.setActivationFunction(new ActivationReLU());

  BatchNormalization bn2 = new BatchNormalization();

  ConvolutionLayer conv3 = new ConvolutionLayer();
  conv3.setKernelSize(new int[]{2, vectorSize});
  conv3.setNOut(10);
  conv3.setStride(new int[]{1, vectorSize});
  conv3.setConvolutionMode(ConvolutionMode.Same);
  conv3.setActivationFunction(new ActivationReLU());

  BatchNormalization bn3 = new BatchNormalization();

  GlobalPoolingLayer gpl = new GlobalPoolingLayer();

  OutputLayer out = new OutputLayer();

  //    clf.setLayers(conv1, bn1, conv2, bn2, conv3, bn3, gpl, out);
  clf.setLayers(conv1, conv2, conv3, gpl, out);
  //    clf.setNumEpochs(50);
  clf.setCacheMode(CacheMode.MEMORY);
  final EpochListener l = new EpochListener();
  l.setN(1);
  clf.setIterationListener(l);

  clf.setEarlyStopping(new EarlyStopping(10, 15));
  clf.setDebug(true);

  // NNC
  NeuralNetConfiguration nnc = new NeuralNetConfiguration();
  nnc.setL2(1e-3);
  final Dropout dropout = new Dropout();
  dropout.setP(0.2);
  nnc.setDropout(dropout);
  clf.setNeuralNetConfiguration(nnc);

  // Data
  final Instances data = DatasetLoader.loadImdb();
  data.randomize(new Random(42));
  RemovePercentage rp = new RemovePercentage();
  rp.setInputFormat(data);
  rp.setPercentage(98);
  final Instances dataFiltered = Filter.useFilter(data, rp);

  TestUtil.holdout(clf, dataFiltered);
}
 
Example 12
Source File: Dl4jMlpTest.java    From wekaDeeplearning4j with GNU General Public License v3.0 4 votes vote down vote up
@Test
public void testTextCnnRegression() throws Exception {
  CnnTextEmbeddingInstanceIterator cnnTextIter = new CnnTextEmbeddingInstanceIterator();
  cnnTextIter.setTrainBatchSize(64);
  cnnTextIter.setWordVectorLocation(DatasetLoader.loadGoogleNewsVectors());
  clf.setInstanceIterator(cnnTextIter);

  cnnTextIter.initialize();
  final WordVectors wordVectors = cnnTextIter.getWordVectors();
  int vectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length;

  ConvolutionLayer conv1 = new ConvolutionLayer();
  conv1.setKernelSize(new int[]{3, vectorSize});
  conv1.setNOut(10);
  conv1.setStride(new int[]{1, vectorSize});
  conv1.setConvolutionMode(ConvolutionMode.Same);

  ConvolutionLayer conv2 = new ConvolutionLayer();
  conv2.setKernelSize(new int[]{2, vectorSize});
  conv2.setNOut(10);
  conv2.setStride(new int[]{1, vectorSize});
  conv2.setConvolutionMode(ConvolutionMode.Same);

  GlobalPoolingLayer gpl = new GlobalPoolingLayer();

  OutputLayer out = new OutputLayer();
  out.setLossFn(new LossMSE());
  out.setActivationFunction(new ActivationIdentity());

  clf.setLayers(conv1, conv2, gpl, out);
  //    clf.setNumEpochs(200);
  clf.setCacheMode(CacheMode.MEMORY);
  final EpochListener l = new EpochListener();
  l.setN(20);
  clf.setIterationListener(l);
  clf.setDebug(true);
  clf.setEarlyStopping(new EarlyStopping(0, 10));
  final Instances data = DatasetLoader.loadAnger();

  NeuralNetConfiguration nnc = new NeuralNetConfiguration();
  nnc.setL2(0.00001);
  Adam opt = new Adam();
  opt.setLearningRate(0.001);
  nnc.setUpdater(opt);

  clf.setNeuralNetConfiguration(nnc);
  TestUtil.holdout(clf, data);
}
 
Example 13
Source File: Dl4jMlpTest.java    From wekaDeeplearning4j with GNU General Public License v3.0 4 votes vote down vote up
@Test
public void testTextCnnTextFilesClassification() throws Exception {
  CnnTextFilesEmbeddingInstanceIterator cnnTextIter = new CnnTextFilesEmbeddingInstanceIterator();
  cnnTextIter.setTrainBatchSize(64);
  cnnTextIter.setWordVectorLocation(DatasetLoader.loadGoogleNewsVectors());
  cnnTextIter.setTextsLocation(DatasetLoader.loadAngerFilesDir());
  clf.setInstanceIterator(cnnTextIter);

  cnnTextIter.initialize();
  final WordVectors wordVectors = cnnTextIter.getWordVectors();
  int vectorSize = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length;

  ConvolutionLayer conv1 = new ConvolutionLayer();
  conv1.setKernelSize(new int[]{4, vectorSize});
  conv1.setNOut(10);
  conv1.setStride(new int[]{1, vectorSize});
  conv1.setConvolutionMode(ConvolutionMode.Same);
  conv1.setActivationFunction(new ActivationReLU());

  ConvolutionLayer conv2 = new ConvolutionLayer();
  conv2.setKernelSize(new int[]{3, vectorSize});
  conv2.setNOut(10);
  conv2.setStride(new int[]{1, vectorSize});
  conv2.setConvolutionMode(ConvolutionMode.Same);
  conv2.setActivationFunction(new ActivationReLU());

  GlobalPoolingLayer gpl = new GlobalPoolingLayer();

  OutputLayer out = new OutputLayer();

  NeuralNetConfiguration nnc = new NeuralNetConfiguration();
  Dropout d = new Dropout();
  d.setP(0.2);
  nnc.setDropout(d);
  clf.setNeuralNetConfiguration(nnc);

  clf.setLayers(conv1, conv2, gpl, out);
  clf.setCacheMode(CacheMode.MEMORY);
  final Instances data = DatasetLoader.loadAngerMetaClassification();
  TestUtil.holdout(clf, data);
}