Java Code Examples for org.datavec.api.conf.Configuration#setBoolean()

The following examples show how to use org.datavec.api.conf.Configuration#setBoolean() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SVMLightRecordWriterTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test(expected = NumberFormatException.class)
public void nonBinaryMultilabel() throws Exception {
    List<Writable> record = Arrays.asList((Writable) new IntWritable(0),
            new IntWritable(1),
            new IntWritable(2));
    File tempFile = File.createTempFile("SVMLightRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    try (SVMLightRecordWriter writer = new SVMLightRecordWriter()) {
        Configuration configWriter = new Configuration();
        configWriter.setInt(SVMLightRecordWriter.FEATURE_FIRST_COLUMN, 0);
        configWriter.setInt(SVMLightRecordWriter.FEATURE_LAST_COLUMN, 1);
        configWriter.setBoolean(SVMLightRecordWriter.MULTILABEL, true);
        FileSplit outputSplit = new FileSplit(tempFile);
        writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        writer.write(record);
    }
}
 
Example 2
Source File: SVMLightRecordWriterTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testMultilabelRecord() throws Exception {
    Configuration configWriter = new Configuration();
    configWriter.setInt(SVMLightRecordWriter.FEATURE_FIRST_COLUMN, 0);
    configWriter.setInt(SVMLightRecordWriter.FEATURE_LAST_COLUMN, 9);
    configWriter.setBoolean(SVMLightRecordWriter.MULTILABEL, true);

    Configuration configReader = new Configuration();
    configReader.setInt(SVMLightRecordReader.NUM_FEATURES, 10);
    configReader.setBoolean(SVMLightRecordReader.MULTILABEL, true);
    configReader.setInt(SVMLightRecordReader.NUM_LABELS, 4);
    configReader.setBoolean(SVMLightRecordReader.ZERO_BASED_INDEXING, false);

    File inputFile = new ClassPathResource("svmlight/multilabel.txt").getFile();
    executeTest(configWriter, configReader, inputFile);
}
 
Example 3
Source File: SVMLightRecordWriterTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testZeroBasedIndexing() throws Exception {
    Configuration configWriter = new Configuration();
    configWriter.setBoolean(SVMLightRecordWriter.ZERO_BASED_INDEXING, true);
    configWriter.setInt(SVMLightRecordWriter.FEATURE_FIRST_COLUMN, 0);
    configWriter.setInt(SVMLightRecordWriter.FEATURE_LAST_COLUMN, 10);
    configWriter.setBoolean(SVMLightRecordWriter.MULTILABEL, true);

    Configuration configReader = new Configuration();
    configReader.setInt(SVMLightRecordReader.NUM_FEATURES, 11);
    configReader.setBoolean(SVMLightRecordReader.MULTILABEL, true);
    configReader.setInt(SVMLightRecordReader.NUM_LABELS, 5);

    File inputFile = new ClassPathResource("datavec-api/svmlight/multilabel.txt").getFile();
    executeTest(configWriter, configReader, inputFile);
}
 
Example 4
Source File: LibSvmRecordReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test(expected = IndexOutOfBoundsException.class)
public void testZeroIndexFeatureWithoutUsingZeroIndexing() throws Exception {
    LibSvmRecordReader rr = new LibSvmRecordReader();
    Configuration config = new Configuration();
    config.setBoolean(LibSvmRecordReader.ZERO_BASED_INDEXING, false);
    config.setBoolean(LibSvmRecordReader.APPEND_LABEL, true);
    config.setInt(LibSvmRecordReader.NUM_FEATURES, 10);
    rr.initialize(config, new FileSplit(new ClassPathResource("svmlight/zeroIndexFeature.txt").getFile()));
    rr.next();
}
 
Example 5
Source File: LibSvmRecordWriterTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testNDArrayWritablesMultilabel() throws Exception {
    INDArray arr2 = Nd4j.zeros(2);
    arr2.putScalar(0, 11);
    arr2.putScalar(1, 12);
    INDArray arr3 = Nd4j.zeros(3);
    arr3.putScalar(0, 0);
    arr3.putScalar(1, 1);
    arr3.putScalar(2, 0);
    List<Writable> record = Arrays.asList((Writable) new DoubleWritable(1),
            new NDArrayWritable(arr2),
            new IntWritable(2),
            new DoubleWritable(3),
            new NDArrayWritable(arr3),
            new DoubleWritable(1));
    File tempFile = File.createTempFile("LibSvmRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    String lineOriginal = "2,4 1:1.0 2:11.0 3:12.0 4:2.0 5:3.0";

    try (LibSvmRecordWriter writer = new LibSvmRecordWriter()) {
        Configuration configWriter = new Configuration();
        configWriter.setBoolean(LibSvmRecordWriter.MULTILABEL, true);
        configWriter.setInt(LibSvmRecordWriter.FEATURE_FIRST_COLUMN, 0);
        configWriter.setInt(LibSvmRecordWriter.FEATURE_LAST_COLUMN, 3);
        FileSplit outputSplit = new FileSplit(tempFile);
        writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        writer.write(record);
    }

    String lineNew = FileUtils.readFileToString(tempFile).trim();
    assertEquals(lineOriginal, lineNew);
}
 
Example 6
Source File: SVMLightRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test(expected = IndexOutOfBoundsException.class)
public void testZeroIndexFeatureWithoutUsingZeroIndexing() throws Exception {
    SVMLightRecordReader rr = new SVMLightRecordReader();
    Configuration config = new Configuration();
    config.setBoolean(SVMLightRecordReader.ZERO_BASED_INDEXING, false);
    config.setInt(SVMLightRecordReader.NUM_FEATURES, 10);
    rr.initialize(config, new FileSplit(new ClassPathResource("datavec-api/svmlight/zeroIndexFeature.txt").getFile()));
    rr.next();
}
 
Example 7
Source File: LibSvmRecordWriterTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testNDArrayWritablesZeroIndex() throws Exception {
    INDArray arr2 = Nd4j.zeros(2);
    arr2.putScalar(0, 11);
    arr2.putScalar(1, 12);
    INDArray arr3 = Nd4j.zeros(3);
    arr3.putScalar(0, 0);
    arr3.putScalar(1, 1);
    arr3.putScalar(2, 0);
    List<Writable> record = Arrays.asList((Writable) new DoubleWritable(1),
            new NDArrayWritable(arr2),
            new IntWritable(2),
            new DoubleWritable(3),
            new NDArrayWritable(arr3),
            new DoubleWritable(1));
    File tempFile = File.createTempFile("LibSvmRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    String lineOriginal = "1,3 0:1.0 1:11.0 2:12.0 3:2.0 4:3.0";

    try (LibSvmRecordWriter writer = new LibSvmRecordWriter()) {
        Configuration configWriter = new Configuration();
        configWriter.setBoolean(LibSvmRecordWriter.ZERO_BASED_INDEXING, true); // NOT STANDARD!
        configWriter.setBoolean(LibSvmRecordWriter.ZERO_BASED_LABEL_INDEXING, true); // NOT STANDARD!
        configWriter.setBoolean(LibSvmRecordWriter.MULTILABEL, true);
        configWriter.setInt(LibSvmRecordWriter.FEATURE_FIRST_COLUMN, 0);
        configWriter.setInt(LibSvmRecordWriter.FEATURE_LAST_COLUMN, 3);
        FileSplit outputSplit = new FileSplit(tempFile);
        writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        writer.write(record);
    }

    String lineNew = FileUtils.readFileToString(tempFile).trim();
    assertEquals(lineOriginal, lineNew);
}
 
Example 8
Source File: LibSvmRecordWriterTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testMultioutputRecord() throws Exception {
    Configuration configWriter = new Configuration();
    configWriter.setInt(LibSvmRecordWriter.FEATURE_FIRST_COLUMN, 0);
    configWriter.setInt(LibSvmRecordWriter.FEATURE_LAST_COLUMN, 9);

    Configuration configReader = new Configuration();
    configReader.setInt(LibSvmRecordReader.NUM_FEATURES, 10);
    configReader.setBoolean(LibSvmRecordReader.ZERO_BASED_INDEXING, false);

    File inputFile = new ClassPathResource("svmlight/multioutput.txt").getFile();
    executeTest(configWriter, configReader, inputFile);
}
 
Example 9
Source File: SVMLightRecordReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testNoAppendLabel() throws IOException, InterruptedException {
    Map<Integer, List<Writable>> correct = new HashMap<>();
    // 7 2:1 4:2 6:3 8:4 10:5
    correct.put(0, Arrays.asList(ZERO, ONE,
                                ZERO, new DoubleWritable(2),
                                ZERO, new DoubleWritable(3),
                                ZERO, new DoubleWritable(4),
                                ZERO, new DoubleWritable(5)));
    // 2 qid:42 1:0.1 2:2 6:6.6 8:80
    correct.put(1, Arrays.asList(new DoubleWritable(0.1), new DoubleWritable(2),
                                ZERO, ZERO,
                                ZERO, new DoubleWritable(6.6),
                                ZERO, new DoubleWritable(80),
                                ZERO, ZERO));
    // 33
    correct.put(2, Arrays.asList(ZERO, ZERO,
                                ZERO, ZERO,
                                ZERO, ZERO,
                                ZERO, ZERO,
                                ZERO, ZERO));

    SVMLightRecordReader rr = new SVMLightRecordReader();
    Configuration config = new Configuration();
    config.setBoolean(SVMLightRecordReader.ZERO_BASED_INDEXING, false);
    config.setInt(SVMLightRecordReader.NUM_FEATURES, 10);
    config.setBoolean(SVMLightRecordReader.APPEND_LABEL, false);
    rr.initialize(config, new FileSplit(new ClassPathResource("svmlight/basic.txt").getFile()));
    int i = 0;
    while (rr.hasNext()) {
        List<Writable> record = rr.next();
        assertEquals(correct.get(i), record);
        i++;
    }
    assertEquals(i, correct.size());
}
 
Example 10
Source File: LibSvmRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test(expected = UnsupportedOperationException.class)
public void testInconsistentNumMultiabelsException() throws Exception {
    LibSvmRecordReader rr = new LibSvmRecordReader();
    Configuration config = new Configuration();
    config.setBoolean(LibSvmRecordReader.MULTILABEL, false);
    config.setBoolean(LibSvmRecordReader.ZERO_BASED_INDEXING, false);
    rr.initialize(config, new FileSplit(new ClassPathResource("datavec-api/svmlight/multilabel.txt").getFile()));
    while (rr.hasNext())
        rr.next();
}
 
Example 11
Source File: SVMLightRecordWriterTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testMultioutputRecord() throws Exception {
    Configuration configWriter = new Configuration();
    configWriter.setInt(SVMLightRecordWriter.FEATURE_FIRST_COLUMN, 0);
    configWriter.setInt(SVMLightRecordWriter.FEATURE_LAST_COLUMN, 9);

    Configuration configReader = new Configuration();
    configReader.setInt(SVMLightRecordReader.NUM_FEATURES, 10);
    configReader.setBoolean(SVMLightRecordReader.ZERO_BASED_INDEXING, false);

    File inputFile = new ClassPathResource("datavec-api/svmlight/multioutput.txt").getFile();
    executeTest(configWriter, configReader, inputFile);
}
 
Example 12
Source File: LibSvmRecordReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test(expected = IndexOutOfBoundsException.class)
public void testZeroIndexLabelWithoutUsingZeroIndexing() throws Exception {
    LibSvmRecordReader rr = new LibSvmRecordReader();
    Configuration config = new Configuration();
    config.setBoolean(LibSvmRecordReader.APPEND_LABEL, true);
    config.setInt(LibSvmRecordReader.NUM_FEATURES, 10);
    config.setBoolean(LibSvmRecordReader.MULTILABEL, true);
    config.setInt(LibSvmRecordReader.NUM_LABELS, 2);
    rr.initialize(config, new FileSplit(new ClassPathResource("svmlight/zeroIndexLabel.txt").getFile()));
    rr.next();
}
 
Example 13
Source File: LibSvmRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test(expected = IndexOutOfBoundsException.class)
public void testZeroIndexLabelWithoutUsingZeroIndexing() throws Exception {
    LibSvmRecordReader rr = new LibSvmRecordReader();
    Configuration config = new Configuration();
    config.setBoolean(LibSvmRecordReader.APPEND_LABEL, true);
    config.setInt(LibSvmRecordReader.NUM_FEATURES, 10);
    config.setBoolean(LibSvmRecordReader.MULTILABEL, true);
    config.setInt(LibSvmRecordReader.NUM_LABELS, 2);
    rr.initialize(config, new FileSplit(new ClassPathResource("datavec-api/svmlight/zeroIndexLabel.txt").getFile()));
    rr.next();
}
 
Example 14
Source File: LibSvmRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testNoAppendLabel() throws IOException, InterruptedException {
    Map<Integer, List<Writable>> correct = new HashMap<>();
    // 7 2:1 4:2 6:3 8:4 10:5
    correct.put(0, Arrays.asList(ZERO, ONE,
            ZERO, new DoubleWritable(2),
            ZERO, new DoubleWritable(3),
            ZERO, new DoubleWritable(4),
            ZERO, new DoubleWritable(5)));
    // 2 qid:42 1:0.1 2:2 6:6.6 8:80
    correct.put(1, Arrays.asList(new DoubleWritable(0.1), new DoubleWritable(2),
            ZERO, ZERO,
            ZERO, new DoubleWritable(6.6),
            ZERO, new DoubleWritable(80),
            ZERO, ZERO));
    // 33
    correct.put(2, Arrays.asList(ZERO, ZERO,
            ZERO, ZERO,
            ZERO, ZERO,
            ZERO, ZERO,
            ZERO, ZERO));

    SVMLightRecordReader rr = new SVMLightRecordReader();
    Configuration config = new Configuration();
    config.setBoolean(SVMLightRecordReader.ZERO_BASED_INDEXING, false);
    config.setInt(SVMLightRecordReader.NUM_FEATURES, 10);
    config.setBoolean(SVMLightRecordReader.APPEND_LABEL, false);
    rr.initialize(config, new FileSplit(new ClassPathResource("datavec-api/svmlight/basic.txt").getFile()));
    int i = 0;
    while (rr.hasNext()) {
        List<Writable> record = rr.next();
        assertEquals(correct.get(i), record);
        i++;
    }
    assertEquals(i, correct.size());
}
 
Example 15
Source File: SVMLightRecordReaderTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testNoLabel() throws IOException, InterruptedException {
    Map<Integer, List<Writable>> correct = new HashMap<>();
    //  2:1 4:2 6:3 8:4 10:5
    correct.put(0, Arrays.asList(ZERO, ONE,
            ZERO, new DoubleWritable(2),
            ZERO, new DoubleWritable(3),
            ZERO, new DoubleWritable(4),
            ZERO, new DoubleWritable(5)));
    //  qid:42 1:0.1 2:2 6:6.6 8:80
    correct.put(1, Arrays.asList(new DoubleWritable(0.1), new DoubleWritable(2),
            ZERO, ZERO,
            ZERO, new DoubleWritable(6.6),
            ZERO, new DoubleWritable(80),
            ZERO, ZERO));
    //  1:1.0
    correct.put(2, Arrays.asList(new DoubleWritable(1.0), ZERO,
            ZERO, ZERO,
            ZERO, ZERO,
            ZERO, ZERO,
            ZERO, ZERO));
    //
    correct.put(3, Arrays.asList(ZERO, ZERO,
            ZERO, ZERO,
            ZERO, ZERO,
            ZERO, ZERO,
            ZERO, ZERO));

    SVMLightRecordReader rr = new SVMLightRecordReader();
    Configuration config = new Configuration();
    config.setBoolean(SVMLightRecordReader.ZERO_BASED_INDEXING, false);
    config.setInt(SVMLightRecordReader.NUM_FEATURES, 10);
    config.setBoolean(SVMLightRecordReader.APPEND_LABEL, true);
    rr.initialize(config, new FileSplit(new ClassPathResource("datavec-api/svmlight/noLabels.txt").getFile()));
    int i = 0;
    while (rr.hasNext()) {
        List<Writable> record = rr.next();
        assertEquals(correct.get(i), record);
        i++;
    }
    assertEquals(i, correct.size());
}
 
Example 16
Source File: LibSvmRecordReaderTest.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Test
public void testMultilabelRecord() throws IOException, InterruptedException {
    Map<Integer, List<Writable>> correct = new HashMap<>();
    // 1,3 2:1 4:2 6:3 8:4 10:5
    correct.put(0, Arrays.asList(ZERO, ONE,
                                ZERO, new DoubleWritable(2),
                                ZERO, new DoubleWritable(3),
                                ZERO, new DoubleWritable(4),
                                ZERO, new DoubleWritable(5),
                                LABEL_ONE, LABEL_ZERO,
                                LABEL_ONE, LABEL_ZERO));
    // 2 qid:42 1:0.1 2:2 6:6.6 8:80
    correct.put(1, Arrays.asList(new DoubleWritable(0.1), new DoubleWritable(2),
                                ZERO, ZERO,
                                ZERO, new DoubleWritable(6.6),
                                ZERO, new DoubleWritable(80),
                                ZERO, ZERO,
                                LABEL_ZERO, LABEL_ONE,
                                LABEL_ZERO, LABEL_ZERO));
    // 1,2,4
    correct.put(2, Arrays.asList(ZERO, ZERO,
                                ZERO, ZERO,
                                ZERO, ZERO,
                                ZERO, ZERO,
                                ZERO, ZERO,
                                LABEL_ONE, LABEL_ONE,
                                LABEL_ZERO, LABEL_ONE));
    //  1:1.0
    correct.put(3, Arrays.asList(new DoubleWritable(1.0), ZERO,
            ZERO, ZERO,
            ZERO, ZERO,
            ZERO, ZERO,
            ZERO, ZERO,
            LABEL_ZERO, LABEL_ZERO,
            LABEL_ZERO, LABEL_ZERO));
    //
    correct.put(4, Arrays.asList(ZERO, ZERO,
            ZERO, ZERO,
            ZERO, ZERO,
            ZERO, ZERO,
            ZERO, ZERO,
            LABEL_ZERO, LABEL_ZERO,
            LABEL_ZERO, LABEL_ZERO));

    LibSvmRecordReader rr = new LibSvmRecordReader();
    Configuration config = new Configuration();
    config.setBoolean(LibSvmRecordReader.ZERO_BASED_INDEXING, false);
    config.setBoolean(LibSvmRecordReader.APPEND_LABEL, true);
    config.setInt(LibSvmRecordReader.NUM_FEATURES, 10);
    config.setBoolean(LibSvmRecordReader.MULTILABEL, true);
    config.setInt(LibSvmRecordReader.NUM_LABELS, 4);
    rr.initialize(config, new FileSplit(new ClassPathResource("svmlight/multilabel.txt").getFile()));
    int i = 0;
    while (rr.hasNext()) {
        List<Writable> record = rr.next();
        assertEquals(correct.get(i), record);
        i++;
    }
    assertEquals(i, correct.size());
}
 
Example 17
Source File: LibSvmRecordReaderTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testZeroBasedIndexing() throws IOException, InterruptedException {
    Map<Integer, List<Writable>> correct = new HashMap<>();
    // 1,3 2:1 4:2 6:3 8:4 10:5
    correct.put(0, Arrays.asList(ZERO,
                                ZERO, ONE,
                                ZERO, new DoubleWritable(2),
                                ZERO, new DoubleWritable(3),
                                ZERO, new DoubleWritable(4),
                                ZERO, new DoubleWritable(5),
                                LABEL_ZERO,
                                LABEL_ONE, LABEL_ZERO,
                                LABEL_ONE, LABEL_ZERO));
    // 2 qid:42 1:0.1 2:2 6:6.6 8:80
    correct.put(1, Arrays.asList(ZERO,
                                new DoubleWritable(0.1), new DoubleWritable(2),
                                ZERO, ZERO,
                                ZERO, new DoubleWritable(6.6),
                                ZERO, new DoubleWritable(80),
                                ZERO, ZERO,
                                LABEL_ZERO,
                                LABEL_ZERO, LABEL_ONE,
                                LABEL_ZERO, LABEL_ZERO));
    // 1,2,4
    correct.put(2, Arrays.asList(ZERO,
                                ZERO, ZERO,
                                ZERO, ZERO,
                                ZERO, ZERO,
                                ZERO, ZERO,
                                ZERO, ZERO,
                                LABEL_ZERO,
                                LABEL_ONE, LABEL_ONE,
                                LABEL_ZERO, LABEL_ONE));
    //  1:1.0
    correct.put(3, Arrays.asList(ZERO,
            new DoubleWritable(1.0), ZERO,
            ZERO, ZERO,
            ZERO, ZERO,
            ZERO, ZERO,
            ZERO, ZERO,
            LABEL_ZERO,
            LABEL_ZERO, LABEL_ZERO,
            LABEL_ZERO, LABEL_ZERO));
    //
    correct.put(4, Arrays.asList(ZERO,
            ZERO, ZERO,
            ZERO, ZERO,
            ZERO, ZERO,
            ZERO, ZERO,
            ZERO, ZERO,
            LABEL_ZERO,
            LABEL_ZERO, LABEL_ZERO,
            LABEL_ZERO, LABEL_ZERO));

    LibSvmRecordReader rr = new LibSvmRecordReader();
    Configuration config = new Configuration();
    // Zero-based indexing is default
    config.setBoolean(SVMLightRecordReader.ZERO_BASED_LABEL_INDEXING, true); // NOT STANDARD!
    config.setBoolean(LibSvmRecordReader.APPEND_LABEL, true);
    config.setInt(LibSvmRecordReader.NUM_FEATURES, 11);
    config.setBoolean(LibSvmRecordReader.MULTILABEL, true);
    config.setInt(LibSvmRecordReader.NUM_LABELS, 5);
    rr.initialize(config, new FileSplit(new ClassPathResource("datavec-api/svmlight/multilabel.txt").getFile()));
    int i = 0;
    while (rr.hasNext()) {
        List<Writable> record = rr.next();
        assertEquals(correct.get(i), record);
        i++;
    }
    assertEquals(i, correct.size());
}
 
Example 18
Source File: SVMLightRecordReaderTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testZeroBasedIndexing() throws IOException, InterruptedException {
    Map<Integer, List<Writable>> correct = new HashMap<>();
    // 1,3 2:1 4:2 6:3 8:4 10:5
    correct.put(0, Arrays.asList(ZERO,
                                ZERO, ONE,
                                ZERO, new DoubleWritable(2),
                                ZERO, new DoubleWritable(3),
                                ZERO, new DoubleWritable(4),
                                ZERO, new DoubleWritable(5),
                                LABEL_ZERO,
                                LABEL_ONE, LABEL_ZERO,
                                LABEL_ONE, LABEL_ZERO));
    // 2 qid:42 1:0.1 2:2 6:6.6 8:80
    correct.put(1, Arrays.asList(ZERO,
                                new DoubleWritable(0.1), new DoubleWritable(2),
                                ZERO, ZERO,
                                ZERO, new DoubleWritable(6.6),
                                ZERO, new DoubleWritable(80),
                                ZERO, ZERO,
                                LABEL_ZERO,
                                LABEL_ZERO, LABEL_ONE,
                                LABEL_ZERO, LABEL_ZERO));
    // 1,2,4
    correct.put(2, Arrays.asList(ZERO,
                                ZERO, ZERO,
                                ZERO, ZERO,
                                ZERO, ZERO,
                                ZERO, ZERO,
                                ZERO, ZERO,
                                LABEL_ZERO,
                                LABEL_ONE, LABEL_ONE,
                                LABEL_ZERO, LABEL_ONE));
    //  1:1.0
    correct.put(3, Arrays.asList(ZERO,
            new DoubleWritable(1.0), ZERO,
            ZERO, ZERO,
            ZERO, ZERO,
            ZERO, ZERO,
            ZERO, ZERO,
            LABEL_ZERO,
            LABEL_ZERO, LABEL_ZERO,
            LABEL_ZERO, LABEL_ZERO));
    //
    correct.put(4, Arrays.asList(ZERO,
            ZERO, ZERO,
            ZERO, ZERO,
            ZERO, ZERO,
            ZERO, ZERO,
            ZERO, ZERO,
            LABEL_ZERO,
            LABEL_ZERO, LABEL_ZERO,
            LABEL_ZERO, LABEL_ZERO));

    SVMLightRecordReader rr = new SVMLightRecordReader();
    Configuration config = new Configuration();
    // Zero-based indexing is default
    config.setBoolean(SVMLightRecordReader.ZERO_BASED_LABEL_INDEXING, true); // NOT STANDARD!
    config.setInt(SVMLightRecordReader.NUM_FEATURES, 11);
    config.setBoolean(SVMLightRecordReader.MULTILABEL, true);
    config.setInt(SVMLightRecordReader.NUM_LABELS, 5);
    rr.initialize(config, new FileSplit(new ClassPathResource("datavec-api/svmlight/multilabel.txt").getFile()));
    int i = 0;
    while (rr.hasNext()) {
        List<Writable> record = rr.next();
        assertEquals(correct.get(i), record);
        i++;
    }
    assertEquals(i, correct.size());
}
 
Example 19
Source File: SVMLightRecordReaderTest.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Test
public void testMultioutputRecord() throws IOException, InterruptedException {
    Map<Integer, List<Writable>> correct = new HashMap<>();
    // 7 2.45,9 2:1 4:2 6:3 8:4 10:5
    correct.put(0, Arrays.asList(ZERO, ONE,
                                ZERO, new DoubleWritable(2),
                                ZERO, new DoubleWritable(3),
                                ZERO, new DoubleWritable(4),
                                ZERO, new DoubleWritable(5),
                                new IntWritable(7), new DoubleWritable(2.45),
                                new IntWritable(9)));
    // 2,3,4 qid:42 1:0.1 2:2 6:6.6 8:80
    correct.put(1, Arrays.asList(new DoubleWritable(0.1), new DoubleWritable(2),
                                ZERO, ZERO,
                                ZERO, new DoubleWritable(6.6),
                                ZERO, new DoubleWritable(80),
                                ZERO, ZERO,
                                new IntWritable(2), new IntWritable(3),
                                new IntWritable(4)));
    // 33,32.0,31.9
    correct.put(2, Arrays.asList(ZERO, ZERO,
                                ZERO, ZERO,
                                ZERO, ZERO,
                                ZERO, ZERO,
                                ZERO, ZERO,
                                new IntWritable(33), new DoubleWritable(32.0),
                                new DoubleWritable(31.9)));

    SVMLightRecordReader rr = new SVMLightRecordReader();
    Configuration config = new Configuration();
    config.setBoolean(SVMLightRecordReader.ZERO_BASED_INDEXING, false);
    config.setInt(SVMLightRecordReader.NUM_FEATURES, 10);
    rr.initialize(config, new FileSplit(new ClassPathResource("svmlight/multioutput.txt").getFile()));
    int i = 0;
    while (rr.hasNext()) {
        List<Writable> record = rr.next();
        assertEquals(correct.get(i), record);
        i++;
    }
    assertEquals(i, correct.size());
}
 
Example 20
Source File: DataIteratorConstructor.java    From scava with Eclipse Public License 2.0 4 votes vote down vote up
private void buildDataIterator(Configuration configuration, FileDataSet fileDataSet, VasttextTextVectorizer vectorizer) throws IOException, InterruptedException
{
	Builder vasttextDataIteratorBuilder = new VasttextDataIterator.Builder(configuration.getInt(batchSize, 32));
	
	configuration.setBoolean(VasttextTextFileReader.LABELLED, fileDataSet.isLabelled());
	configuration.setBoolean(VasttextTextFileReader.MULTILABEL, fileDataSet.isMultiLabel());
	
	VasttextTextFileReader textFileReader = new VasttextTextFileReader();
	if(vectorizer!=null)
		textFileReader.setVastTextTextVectorizer(vectorizer);
	textFileReader.initialize(configuration, new FileSplit(fileDataSet.getTextFilePath().toFile()));
	
	multilabel=fileDataSet.isMultiLabel();
	textFeaturesSize=textFileReader.getTextFeaturesSize();
	labels=textFileReader.getLabels();
	
	if(labels!=null && storeVectorizer)
	{
		this.vectorizer=textFileReader.getVasttextTextVectorizer();
	}
	else
	{
		storeVectorizer=false;
		this.vectorizer=null;
	}
	
	vasttextDataIteratorBuilder.addReader("textReader", textFileReader);
	if(labels!=null)
		vasttextDataIteratorBuilder.addOutput("textReader", labels.size());
	vasttextDataIteratorBuilder.addInput("textReader");
	
	if(fileDataSet.getExtraFilePath()!=null)
	{
		configuration.setInt(VasttextExtraFileReader.FEATURES, fileDataSet.numericFeaturesSize());
		
		VasttextExtraFileReader extraFileReader = new VasttextExtraFileReader(textFileReader.getLinesDeleted());
		extraFileReader.initialize(configuration, new FileSplit(fileDataSet.getExtraFilePath().toFile()));
		
		numericFeaturesSize=fileDataSet.numericFeaturesSize();
		
		vasttextDataIteratorBuilder.addReader("extraReader", extraFileReader);
		vasttextDataIteratorBuilder.addInput("extraReader");
	}

	dataIterator = vasttextDataIteratorBuilder.build();
	
}