Java Code Examples for org.deeplearning4j.text.documentiterator.LabelledDocument#getReferencedContent()

The following examples show how to use org.deeplearning4j.text.documentiterator.LabelledDocument#getReferencedContent() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DocumentSequenceConvertFunction.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Sequence<VocabWord> call(LabelledDocument document) throws Exception {
    Sequence<VocabWord> sequence = new Sequence<>();

    // get elements
    if (document.getReferencedContent() != null && !document.getReferencedContent().isEmpty()) {
        sequence.addElements(document.getReferencedContent());
    } else {
        if (tokenizerFactory == null)
            instantiateTokenizerFactory();

        List<String> tokens = tokenizerFactory.create(document.getContent()).getTokens();

        for (String token : tokens) {
            if (token == null || token.isEmpty())
                continue;

            VocabWord word = new VocabWord(1.0, token);
            sequence.addElement(word);
        }
    }

    // get labels
    for (String label : document.getLabels()) {
        if (label == null || label.isEmpty())
            continue;

        VocabWord labelElement = new VocabWord(1.0, label);
        labelElement.markAsLabel(true);

        sequence.addSequenceLabel(labelElement);
    }

    return sequence;
}
 
Example 2
Source File: ParagraphVectorsTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testIterator() throws IOException {
    val folder_labeled = testDir.newFolder();
    val folder_unlabeled = testDir.newFolder();
    new ClassPathResource("/paravec/labeled/").copyDirectory(folder_labeled);
    new ClassPathResource("/paravec/unlabeled/").copyDirectory(folder_unlabeled);


    FileLabelAwareIterator labelAwareIterator = new FileLabelAwareIterator.Builder()
            .addSourceFolder(folder_labeled).build();

    File resource_sentences = Resources.asFile("/big/raw_sentences.txt");
    SentenceIterator iter = new BasicLineIterator(resource_sentences);

    int i = 0;
    for (; i < 10; ++i) {
        int j = 0;
        int labels = 0;
        int words = 0;
        while (labelAwareIterator.hasNextDocument()) {
            ++j;
            LabelledDocument document = labelAwareIterator.nextDocument();
            labels += document.getLabels().size();
            List<VocabWord> lst =  document.getReferencedContent();
            if (!CollectionUtils.isEmpty(lst))
                words += lst.size();
        }
        labelAwareIterator.reset();
        //System.out.println(words + " " + labels + " " + j);
        assertEquals(0, words);
        assertEquals(30, labels);
        assertEquals(30, j);
        j = 0;
        while (iter.hasNext()) {
            ++j;
            iter.nextSentence();
        }
        assertEquals(97162, j);
        iter.reset();
    }

}