org.deeplearning4j.text.documentiterator.LabelAwareIterator Java Examples

The following examples show how to use org.deeplearning4j.text.documentiterator.LabelAwareIterator. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Word2VecTestsSmall.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testLabelAwareIterator_1() throws Exception {
    val resource = new ClassPathResource("/labeled");
    val file = resource.getFile();

    val iter = (LabelAwareIterator) new FileLabelAwareIterator.Builder().addSourceFolder(file).build();

    val t = new DefaultTokenizerFactory();

    val w2v = new Word2Vec.Builder()
            .iterate(iter)
            .tokenizerFactory(t)
            .build();

    // we hope nothing is going to happen here
}
 
Example #2
Source File: BasicTransformerIterator.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public BasicTransformerIterator(@NonNull LabelAwareIterator iterator, @NonNull SentenceTransformer transformer) {
    this.iterator = iterator;
    this.allowMultithreading = false;
    this.sentenceTransformer = transformer;

    this.iterator.reset();
}
 
Example #3
Source File: ParallelTransformerIterator.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public ParallelTransformerIterator(@NonNull LabelAwareIterator iterator, @NonNull SentenceTransformer transformer,
                                   boolean allowMultithreading) {
    super(new AsyncLabelAwareIterator(iterator, 512), transformer);
    //super(iterator, transformer);
    this.allowMultithreading = allowMultithreading;
    //this.stringBuffer = new LinkedBlockingQueue<>(512);

    //threads = new TokenizerThread[1];
    //threads = new TokenizerThread[allowMultithreading ? Math.max(Runtime.getRuntime().availableProcessors(), 2) : 1];
    executorService = Executors.newFixedThreadPool(allowMultithreading ? Math.max(Runtime.getRuntime().availableProcessors(), 2) : 1);

    prefetchIterator();
}
 
Example #4
Source File: TfidfVectorizerTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public void testTfIdfVectorizerFromLabelAwareIterator() throws Exception {
    LabelledDocument doc1 = new LabelledDocument();
    doc1.addLabel("dog");
    doc1.setContent("it barks like a dog");

    LabelledDocument doc2 = new LabelledDocument();
    doc2.addLabel("cat");
    doc2.setContent("it meows like a cat");

    List<LabelledDocument> docs = new ArrayList<>(2);
    docs.add(doc1);
    docs.add(doc2);
    
    LabelAwareIterator iterator = new SimpleLabelAwareIterator(docs);
    TokenizerFactory tokenizerFactory = new DefaultTokenizerFactory();

    TfidfVectorizer vectorizer = new TfidfVectorizer
        .Builder()
        .setMinWordFrequency(1)
        .setStopWords(new ArrayList<String>())
        .setTokenizerFactory(tokenizerFactory)
        .setIterator(iterator)
        .allowParallelTokenization(false)
        .build();

    vectorizer.fit();

    DataSet dataset = vectorizer.vectorize("it meows like a cat", "cat");
    assertNotNull(dataset);
    
    LabelsSource source = vectorizer.getLabelsSource();
    assertEquals(2, source.getNumberOfLabelsUsed());
    List<String> labels = source.getLabels();
    assertEquals("dog", labels.get(0));
    assertEquals("cat", labels.get(1));
}
 
Example #5
Source File: SentenceTransformer.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
private SentenceTransformer(@NonNull LabelAwareIterator iterator) {
    this.iterator = iterator;
}
 
Example #6
Source File: ParallelTransformerIterator.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
public ParallelTransformerIterator(@NonNull LabelAwareIterator iterator, @NonNull SentenceTransformer transformer) {
    this(iterator, transformer, true);
}
 
Example #7
Source File: TfidfVectorizer.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
public Builder setIterator(@NonNull LabelAwareIterator iterator) {
    this.iterator = new LabelAwareIteratorWrapper(iterator, labelsSource);
    return this;
}
 
Example #8
Source File: BagOfWordsVectorizer.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
public Builder setIterator(@NonNull LabelAwareIterator iterator) {
    this.iterator = iterator;
    return this;
}
 
Example #9
Source File: LabelAwareConverter.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
public LabelAwareConverter(@NonNull LabelAwareIterator iterator, @NonNull List<String> labels) {
    this.backingIterator = iterator;
    this.labels = labels;
}
 
Example #10
Source File: CnnSentenceDataSetIterator.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
/**
 * Specify how the (labelled) sentences / documents should be provided
 */
public Builder sentenceProvider(LabelAwareIterator iterator, @NonNull List<String> labels) {
    LabelAwareConverter converter = new LabelAwareConverter(iterator, labels);
    return sentenceProvider(converter);
}
 
Example #11
Source File: ParagraphVectorsTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
/**
 * This test is not indicative.
 * there's no need in this test within travis, use it manually only for problems detection
 *
 * @throws Exception
 */
@Test
@Ignore
public void testParagraphVectorsReducedLabels1() throws Exception {
    val tempDir = testDir.newFolder();
    ClassPathResource resource = new ClassPathResource("/labeled");
    resource.copyDirectory(tempDir);

    LabelAwareIterator iter = new FileLabelAwareIterator.Builder().addSourceFolder(tempDir).build();

    TokenizerFactory t = new DefaultTokenizerFactory();

    /**
     * Please note: text corpus is REALLY small, and some kind of "results" could be received with HIGH epochs number, like 30.
     * But there's no reason to keep at that high
     */

    ParagraphVectors vec = new ParagraphVectors.Builder().minWordFrequency(1).epochs(3).layerSize(100)
                    .stopWords(new ArrayList<String>()).windowSize(5).iterate(iter).tokenizerFactory(t).build();

    vec.fit();

    //WordVectorSerializer.writeWordVectors(vec, "vectors.txt");

    INDArray w1 = vec.lookupTable().vector("I");
    INDArray w2 = vec.lookupTable().vector("am");
    INDArray w3 = vec.lookupTable().vector("sad.");

    INDArray words = Nd4j.create(3, vec.lookupTable().layerSize());

    words.putRow(0, w1);
    words.putRow(1, w2);
    words.putRow(2, w3);


    INDArray mean = words.isMatrix() ? words.mean(0) : words;

    log.info("Mean" + Arrays.toString(mean.dup().data().asDouble()));
    log.info("Array" + Arrays.toString(vec.lookupTable().vector("negative").dup().data().asDouble()));

    double simN = Transforms.cosineSim(mean, vec.lookupTable().vector("negative"));
    log.info("Similarity negative: " + simN);


    double simP = Transforms.cosineSim(mean, vec.lookupTable().vector("neutral"));
    log.info("Similarity neutral: " + simP);

    double simV = Transforms.cosineSim(mean, vec.lookupTable().vector("positive"));
    log.info("Similarity positive: " + simV);
}
 
Example #12
Source File: Word2Vec.java    From deeplearning4j with Apache License 2.0 2 votes vote down vote up
/**
 * This method used to feed LabelAwareIterator, that is usually used
 *
 * @param iterator
 * @return
 */
public Builder iterate(@NonNull LabelAwareIterator iterator) {
    this.labelAwareIterator = iterator;
    return this;
}