org.datavec.api.split.partition.NumberOfRecordsPartitioner Java Examples

The following examples show how to use org.datavec.api.split.partition.NumberOfRecordsPartitioner. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SVMLightRecordWriterTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testNonIntegerButValidMultilabel() throws Exception {
    List<Writable> record = Arrays.asList((Writable) new IntWritable(3),
            new IntWritable(2),
            new DoubleWritable(1.0));
    File tempFile = File.createTempFile("SVMLightRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    try (SVMLightRecordWriter writer = new SVMLightRecordWriter()) {
        Configuration configWriter = new Configuration();
        configWriter.setInt(SVMLightRecordWriter.FEATURE_FIRST_COLUMN, 0);
        configWriter.setInt(SVMLightRecordWriter.FEATURE_LAST_COLUMN, 1);
        configWriter.setBoolean(SVMLightRecordWriter.MULTILABEL, true);
        FileSplit outputSplit = new FileSplit(tempFile);
        writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        writer.write(record);
    }
}
 
Example #2
Source File: SVMLightRecordWriterTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test(expected = NumberFormatException.class)
public void nonBinaryMultilabel() throws Exception {
    List<Writable> record = Arrays.asList((Writable) new IntWritable(0),
            new IntWritable(1),
            new IntWritable(2));
    File tempFile = File.createTempFile("SVMLightRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    try (SVMLightRecordWriter writer = new SVMLightRecordWriter()) {
        Configuration configWriter = new Configuration();
        configWriter.setInt(SVMLightRecordWriter.FEATURE_FIRST_COLUMN, 0);
        configWriter.setInt(SVMLightRecordWriter.FEATURE_LAST_COLUMN, 1);
        configWriter.setBoolean(SVMLightRecordWriter.MULTILABEL, true);
        FileSplit outputSplit = new FileSplit(tempFile);
        writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        writer.write(record);
    }
}
 
Example #3
Source File: LibSvmRecordWriterTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test(expected = NumberFormatException.class)
public void nonBinaryMultilabel() throws Exception {
    List<Writable> record = Arrays.asList((Writable) new IntWritable(0),
            new IntWritable(1),
            new IntWritable(2));
    File tempFile = File.createTempFile("LibSvmRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    try (LibSvmRecordWriter writer = new LibSvmRecordWriter()) {
        Configuration configWriter = new Configuration();
        configWriter.setInt(LibSvmRecordWriter.FEATURE_FIRST_COLUMN,0);
        configWriter.setInt(LibSvmRecordWriter.FEATURE_LAST_COLUMN,1);
        configWriter.setBoolean(LibSvmRecordWriter.MULTILABEL,true);
        FileSplit outputSplit = new FileSplit(tempFile);
        writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        writer.write(record);
    }
}
 
Example #4
Source File: LibSvmRecordWriterTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test(expected = NumberFormatException.class)
public void nonIntegerMultilabel() throws Exception {
    List<Writable> record = Arrays.asList((Writable) new IntWritable(3),
                                            new IntWritable(2),
                                            new DoubleWritable(1.2));
    File tempFile = File.createTempFile("LibSvmRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    try (LibSvmRecordWriter writer = new LibSvmRecordWriter()) {
        Configuration configWriter = new Configuration();
        configWriter.setInt(LibSvmRecordWriter.FEATURE_FIRST_COLUMN, 0);
        configWriter.setInt(LibSvmRecordWriter.FEATURE_LAST_COLUMN, 1);
        configWriter.setBoolean(LibSvmRecordWriter.MULTILABEL, true);
        FileSplit outputSplit = new FileSplit(tempFile);
        writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        writer.write(record);
    }
}
 
Example #5
Source File: LibSvmRecordWriterTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testNonIntegerButValidMultilabel() throws Exception {
    List<Writable> record = Arrays.asList((Writable) new IntWritable(3),
            new IntWritable(2),
            new DoubleWritable(1.0));
    File tempFile = File.createTempFile("LibSvmRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    try (LibSvmRecordWriter writer = new LibSvmRecordWriter()) {
        Configuration configWriter = new Configuration();
        configWriter.setInt(LibSvmRecordWriter.FEATURE_FIRST_COLUMN, 0);
        configWriter.setInt(LibSvmRecordWriter.FEATURE_LAST_COLUMN, 1);
        configWriter.setBoolean(LibSvmRecordWriter.MULTILABEL, true);
        FileSplit outputSplit = new FileSplit(tempFile);
        writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        writer.write(record);
    }
}
 
Example #6
Source File: SVMLightRecordWriterTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test(expected = NumberFormatException.class)
public void nonIntegerMultilabel() throws Exception {
    List<Writable> record = Arrays.asList((Writable) new IntWritable(3),
                                            new IntWritable(2),
                                            new DoubleWritable(1.2));
    File tempFile = File.createTempFile("SVMLightRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    try (SVMLightRecordWriter writer = new SVMLightRecordWriter()) {
        Configuration configWriter = new Configuration();
        configWriter.setInt(SVMLightRecordWriter.FEATURE_FIRST_COLUMN, 0);
        configWriter.setInt(SVMLightRecordWriter.FEATURE_LAST_COLUMN, 1);
        configWriter.setBoolean(SVMLightRecordWriter.MULTILABEL, true);
        FileSplit outputSplit = new FileSplit(tempFile);
        writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        writer.write(record);
    }
}
 
Example #7
Source File: PartitionerTests.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testInputAddFile() throws Exception {
    Partitioner partitioner = new NumberOfRecordsPartitioner();
    File tmpDir = Files.createTempDir();
    FileSplit fileSplit = new FileSplit(tmpDir);
    assertTrue(fileSplit.needsBootstrapForWrite());
    fileSplit.bootStrapForWrite();
    Configuration configuration = new Configuration();
    configuration.set(NumberOfRecordsPartitioner.RECORDS_PER_FILE_CONFIG,String.valueOf(5));
    partitioner.init(configuration,fileSplit);
    partitioner.updatePartitionInfo(PartitionMetaData.builder().numRecordsUpdated(5).build());
    assertTrue(partitioner.needsNewPartition());
    OutputStream os = partitioner.openNewStream();
    os.close();
    assertNotNull(os);
    //run more than once to ensure output stream creation works properly
    partitioner.updatePartitionInfo(PartitionMetaData.builder().numRecordsUpdated(5).build());
    os = partitioner.openNewStream();
    os.close();
    assertNotNull(os);


}
 
Example #8
Source File: ExcelRecordWriterTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriter() throws Exception  {
    ExcelRecordWriter excelRecordWriter = new ExcelRecordWriter();
    val records = records();
    File tmpDir = testDir.newFolder();
    File outputFile = new File(tmpDir,"testexcel.xlsx");
    outputFile.deleteOnExit();
    FileSplit fileSplit = new FileSplit(outputFile);
    excelRecordWriter.initialize(fileSplit,new NumberOfRecordsPartitioner());
    excelRecordWriter.writeBatch(records.getRight());
    excelRecordWriter.close();
    File parentFile = outputFile.getParentFile();
    assertEquals(1,parentFile.list().length);

    ExcelRecordReader excelRecordReader = new ExcelRecordReader();
    excelRecordReader.initialize(fileSplit);
    List<List<Writable>> next = excelRecordReader.next(10);
    assertEquals(10,next.size());

}
 
Example #9
Source File: LibSvmRecordWriterTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testNonIntegerButValidMultilabel() throws Exception {
    List<Writable> record = Arrays.asList((Writable) new IntWritable(3),
            new IntWritable(2),
            new DoubleWritable(1.0));
    File tempFile = File.createTempFile("LibSvmRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    try (LibSvmRecordWriter writer = new LibSvmRecordWriter()) {
        Configuration configWriter = new Configuration();
        configWriter.setInt(LibSvmRecordWriter.FEATURE_FIRST_COLUMN, 0);
        configWriter.setInt(LibSvmRecordWriter.FEATURE_LAST_COLUMN, 1);
        configWriter.setBoolean(LibSvmRecordWriter.MULTILABEL, true);
        FileSplit outputSplit = new FileSplit(tempFile);
        writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        writer.write(record);
    }
}
 
Example #10
Source File: SVMLightRecordWriterTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test(expected = NumberFormatException.class)
public void nonBinaryMultilabel() throws Exception {
    List<Writable> record = Arrays.asList((Writable) new IntWritable(0),
            new IntWritable(1),
            new IntWritable(2));
    File tempFile = File.createTempFile("SVMLightRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    try (SVMLightRecordWriter writer = new SVMLightRecordWriter()) {
        Configuration configWriter = new Configuration();
        configWriter.setInt(SVMLightRecordWriter.FEATURE_FIRST_COLUMN, 0);
        configWriter.setInt(SVMLightRecordWriter.FEATURE_LAST_COLUMN, 1);
        configWriter.setBoolean(SVMLightRecordWriter.MULTILABEL, true);
        FileSplit outputSplit = new FileSplit(tempFile);
        writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        writer.write(record);
    }
}
 
Example #11
Source File: SVMLightRecordWriterTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test(expected = NumberFormatException.class)
public void nonIntegerMultilabel() throws Exception {
    List<Writable> record = Arrays.asList((Writable) new IntWritable(3),
                                            new IntWritable(2),
                                            new DoubleWritable(1.2));
    File tempFile = File.createTempFile("SVMLightRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    try (SVMLightRecordWriter writer = new SVMLightRecordWriter()) {
        Configuration configWriter = new Configuration();
        configWriter.setInt(SVMLightRecordWriter.FEATURE_FIRST_COLUMN, 0);
        configWriter.setInt(SVMLightRecordWriter.FEATURE_LAST_COLUMN, 1);
        configWriter.setBoolean(SVMLightRecordWriter.MULTILABEL, true);
        FileSplit outputSplit = new FileSplit(tempFile);
        writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        writer.write(record);
    }
}
 
Example #12
Source File: LibSvmRecordWriterTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test(expected = NumberFormatException.class)
public void nonIntegerMultilabel() throws Exception {
    List<Writable> record = Arrays.asList((Writable) new IntWritable(3),
                                            new IntWritable(2),
                                            new DoubleWritable(1.2));
    File tempFile = File.createTempFile("LibSvmRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    try (LibSvmRecordWriter writer = new LibSvmRecordWriter()) {
        Configuration configWriter = new Configuration();
        configWriter.setInt(LibSvmRecordWriter.FEATURE_FIRST_COLUMN, 0);
        configWriter.setInt(LibSvmRecordWriter.FEATURE_LAST_COLUMN, 1);
        configWriter.setBoolean(LibSvmRecordWriter.MULTILABEL, true);
        FileSplit outputSplit = new FileSplit(tempFile);
        writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        writer.write(record);
    }
}
 
Example #13
Source File: LibSvmRecordWriterTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test(expected = NumberFormatException.class)
public void nonBinaryMultilabel() throws Exception {
    List<Writable> record = Arrays.asList((Writable) new IntWritable(0),
            new IntWritable(1),
            new IntWritable(2));
    File tempFile = File.createTempFile("LibSvmRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    try (LibSvmRecordWriter writer = new LibSvmRecordWriter()) {
        Configuration configWriter = new Configuration();
        configWriter.setInt(LibSvmRecordWriter.FEATURE_FIRST_COLUMN,0);
        configWriter.setInt(LibSvmRecordWriter.FEATURE_LAST_COLUMN,1);
        configWriter.setBoolean(LibSvmRecordWriter.MULTILABEL,true);
        FileSplit outputSplit = new FileSplit(tempFile);
        writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        writer.write(record);
    }
}
 
Example #14
Source File: SVMLightRecordWriterTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testNonIntegerButValidMultilabel() throws Exception {
    List<Writable> record = Arrays.asList((Writable) new IntWritable(3),
            new IntWritable(2),
            new DoubleWritable(1.0));
    File tempFile = File.createTempFile("SVMLightRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    try (SVMLightRecordWriter writer = new SVMLightRecordWriter()) {
        Configuration configWriter = new Configuration();
        configWriter.setInt(SVMLightRecordWriter.FEATURE_FIRST_COLUMN, 0);
        configWriter.setInt(SVMLightRecordWriter.FEATURE_LAST_COLUMN, 1);
        configWriter.setBoolean(SVMLightRecordWriter.MULTILABEL, true);
        FileSplit outputSplit = new FileSplit(tempFile);
        writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        writer.write(record);
    }
}
 
Example #15
Source File: ExcelRecordWriterTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriter() throws Exception  {
    ExcelRecordWriter excelRecordWriter = new ExcelRecordWriter();
    val records = records();
    File tmpDir = Files.createTempDirectory("testexcel").toFile();
    File outputFile = new File(tmpDir,"testexcel.xlsx");
    outputFile.deleteOnExit();
    FileSplit fileSplit = new FileSplit(outputFile);
    excelRecordWriter.initialize(fileSplit,new NumberOfRecordsPartitioner());
    excelRecordWriter.writeBatch(records.getRight());
    excelRecordWriter.close();
    File parentFile = outputFile.getParentFile();
    assertEquals(1,parentFile.list().length);

    ExcelRecordReader excelRecordReader = new ExcelRecordReader();
    excelRecordReader.initialize(fileSplit);
    List<List<Writable>> next = excelRecordReader.next(10);
    assertEquals(10,next.size());

}
 
Example #16
Source File: BatchInputParserMultiRecordTest.java    From konduit-serving with Apache License 2.0 6 votes vote down vote up
@Test(timeout = 60000)
public void runAdd(TestContext testContext) throws Exception {
    BatchInputArrowParserVerticle verticleRef = (BatchInputArrowParserVerticle) verticle;
    Schema irisInputSchema = TrainUtils.getIrisInputSchema();
    ArrowRecordWriter arrowRecordWriter = new ArrowRecordWriter(irisInputSchema);
    CSVRecordReader reader = new CSVRecordReader();
    reader.initialize(new FileSplit(new ClassPathResource("iris.txt").getFile()));
    List<List<Writable>> writables = reader.next(150);

    File tmpFile = new File(temporary.getRoot(), "tmp.arrow");
    FileSplit fileSplit = new FileSplit(tmpFile);
    arrowRecordWriter.initialize(fileSplit, new NumberOfRecordsPartitioner());
    arrowRecordWriter.writeBatch(writables);

    given().port(port)
            .multiPart("input1", tmpFile)
            .when().post("/")
            .then().statusCode(200);

    testContext.assertNotNull(verticleRef.getBatch(), "Inputs were null. This means parsing failed.");
    testContext.assertTrue(verticleRef.getBatch().length >= 1);
    testContext.assertNotNull(verticleRef.getBatch());
    testContext.assertEquals(150, verticleRef.getBatch().length);
}
 
Example #17
Source File: ArrowBinaryInputAdapterTest.java    From konduit-serving with Apache License 2.0 6 votes vote down vote up
@Test(timeout = 60000)

    public void testArrowBinary() throws Exception {
        Schema irisInputSchema = TrainUtils.getIrisInputSchema();
        ArrowRecordWriter arrowRecordWriter = new ArrowRecordWriter(irisInputSchema);
        CSVRecordReader reader = new CSVRecordReader();
        reader.initialize(new FileSplit(new ClassPathResource("iris.txt").getFile()));
        List<List<Writable>> writables = reader.next(150);

        File tmpFile = new File(temporary.getRoot(), "tmp.arrow");
        FileSplit fileSplit = new FileSplit(tmpFile);
        arrowRecordWriter.initialize(fileSplit, new NumberOfRecordsPartitioner());
        arrowRecordWriter.writeBatch(writables);
        byte[] arrowBytes = FileUtils.readFileToByteArray(tmpFile);

        Buffer buffer = Buffer.buffer(arrowBytes);
        ArrowBinaryInputAdapter arrowBinaryInputAdapter = new ArrowBinaryInputAdapter();
        ArrowWritableRecordBatch convert = arrowBinaryInputAdapter.convert(buffer, ConverterArgs.builder().schema(irisInputSchema).build(), null);
        assertEquals(writables.size(), convert.size());
    }
 
Example #18
Source File: PartitionerTests.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testInputAddFile() throws Exception {
    Partitioner partitioner = new NumberOfRecordsPartitioner();
    File tmpDir = Files.createTempDir();
    FileSplit fileSplit = new FileSplit(tmpDir);
    assertTrue(fileSplit.needsBootstrapForWrite());
    fileSplit.bootStrapForWrite();
    Configuration configuration = new Configuration();
    configuration.set(NumberOfRecordsPartitioner.RECORDS_PER_FILE_CONFIG,String.valueOf(5));
    partitioner.init(configuration,fileSplit);
    partitioner.updatePartitionInfo(PartitionMetaData.builder().numRecordsUpdated(5).build());
    assertTrue(partitioner.needsNewPartition());
    OutputStream os = partitioner.openNewStream();
    os.close();
    assertNotNull(os);
    //run more than once to ensure output stream creation works properly
    partitioner.updatePartitionInfo(PartitionMetaData.builder().numRecordsUpdated(5).build());
    os = partitioner.openNewStream();
    os.close();
    assertNotNull(os);


}
 
Example #19
Source File: SVMLightRecordWriterTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testNDArrayWritablesMultilabel() throws Exception {
    INDArray arr2 = Nd4j.zeros(2);
    arr2.putScalar(0, 11);
    arr2.putScalar(1, 12);
    INDArray arr3 = Nd4j.zeros(3);
    arr3.putScalar(0, 0);
    arr3.putScalar(1, 1);
    arr3.putScalar(2, 0);
    List<Writable> record = Arrays.asList((Writable) new DoubleWritable(1),
            new NDArrayWritable(arr2),
            new IntWritable(2),
            new DoubleWritable(3),
            new NDArrayWritable(arr3),
            new DoubleWritable(1));
    File tempFile = File.createTempFile("SVMLightRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    String lineOriginal = "2,4 1:1.0 2:11.0 3:12.0 4:2.0 5:3.0";

    try (SVMLightRecordWriter writer = new SVMLightRecordWriter()) {
        Configuration configWriter = new Configuration();
        configWriter.setBoolean(SVMLightRecordWriter.MULTILABEL, true);
        configWriter.setInt(SVMLightRecordWriter.FEATURE_FIRST_COLUMN, 0);
        configWriter.setInt(SVMLightRecordWriter.FEATURE_LAST_COLUMN, 3);
        FileSplit outputSplit = new FileSplit(tempFile);
        writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        writer.write(record);
    }

    String lineNew = FileUtils.readFileToString(tempFile).trim();
    assertEquals(lineOriginal, lineNew);
}
 
Example #20
Source File: SVMLightRecordWriterTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testNDArrayWritablesZeroIndex() throws Exception {
    INDArray arr2 = Nd4j.zeros(2);
    arr2.putScalar(0, 11);
    arr2.putScalar(1, 12);
    INDArray arr3 = Nd4j.zeros(3);
    arr3.putScalar(0, 0);
    arr3.putScalar(1, 1);
    arr3.putScalar(2, 0);
    List<Writable> record = Arrays.asList((Writable) new DoubleWritable(1),
            new NDArrayWritable(arr2),
            new IntWritable(2),
            new DoubleWritable(3),
            new NDArrayWritable(arr3),
            new DoubleWritable(1));
    File tempFile = File.createTempFile("SVMLightRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    String lineOriginal = "1,3 0:1.0 1:11.0 2:12.0 3:2.0 4:3.0";

    try (SVMLightRecordWriter writer = new SVMLightRecordWriter()) {
        Configuration configWriter = new Configuration();
        configWriter.setBoolean(SVMLightRecordWriter.ZERO_BASED_INDEXING, true); // NOT STANDARD!
        configWriter.setBoolean(SVMLightRecordWriter.ZERO_BASED_LABEL_INDEXING, true); // NOT STANDARD!
        configWriter.setBoolean(SVMLightRecordWriter.MULTILABEL, true);
        configWriter.setInt(SVMLightRecordWriter.FEATURE_FIRST_COLUMN, 0);
        configWriter.setInt(SVMLightRecordWriter.FEATURE_LAST_COLUMN, 3);
        FileSplit outputSplit = new FileSplit(tempFile);
        writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        writer.write(record);
    }

    String lineNew = FileUtils.readFileToString(tempFile).trim();
    assertEquals(lineOriginal, lineNew);
}
 
Example #21
Source File: PartitionerTests.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testRecordsPerFilePartition() {
    Partitioner partitioner = new NumberOfRecordsPartitioner();
    File tmpDir = Files.createTempDir();
    FileSplit fileSplit = new FileSplit(tmpDir);
    assertTrue(fileSplit.needsBootstrapForWrite());
    fileSplit.bootStrapForWrite();
    partitioner.init(fileSplit);
    assertEquals(1,partitioner.numPartitions());
}
 
Example #22
Source File: LibSvmRecordWriterTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public static void executeTest(Configuration configWriter, Configuration configReader, File inputFile) throws Exception {
    File tempFile = File.createTempFile("LibSvmRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    try (LibSvmRecordWriter writer = new LibSvmRecordWriter()) {
       FileSplit outputSplit = new FileSplit(tempFile);
       writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        LibSvmRecordReader rr = new LibSvmRecordReader();
        rr.initialize(configReader, new FileSplit(inputFile));
        while (rr.hasNext()) {
            List<Writable> record = rr.next();
            writer.write(record);
        }
    }

    Pattern p = Pattern.compile(String.format("%s:\\d+ ", LibSvmRecordReader.QID_PREFIX));
    List<String> linesOriginal = new ArrayList<>();
    for (String line : FileUtils.readLines(inputFile)) {
        if (!line.startsWith(LibSvmRecordReader.COMMENT_CHAR)) {
            String lineClean = line.split(LibSvmRecordReader.COMMENT_CHAR, 2)[0];
            if (lineClean.startsWith(" ")) {
                lineClean = " " + lineClean.trim();
            } else {
                lineClean = lineClean.trim();
            }
            Matcher m = p.matcher(lineClean);
            lineClean = m.replaceAll("");
            linesOriginal.add(lineClean);
        }
    }
    List<String> linesNew = FileUtils.readLines(tempFile);
    assertEquals(linesOriginal, linesNew);
}
 
Example #23
Source File: LibSvmRecordWriterTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testNDArrayWritables() throws Exception {
    INDArray arr2 = Nd4j.zeros(2);
    arr2.putScalar(0, 11);
    arr2.putScalar(1, 12);
    INDArray arr3 = Nd4j.zeros(3);
    arr3.putScalar(0, 13);
    arr3.putScalar(1, 14);
    arr3.putScalar(2, 15);
    List<Writable> record = Arrays.asList((Writable) new DoubleWritable(1),
                                        new NDArrayWritable(arr2),
                                        new IntWritable(2),
                                        new DoubleWritable(3),
                                        new NDArrayWritable(arr3),
                                        new IntWritable(4));
    File tempFile = File.createTempFile("LibSvmRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    String lineOriginal = "13.0,14.0,15.0,4 1:1.0 2:11.0 3:12.0 4:2.0 5:3.0";

    try (LibSvmRecordWriter writer = new LibSvmRecordWriter()) {
        Configuration configWriter = new Configuration();
        configWriter.setInt(LibSvmRecordWriter.FEATURE_FIRST_COLUMN, 0);
        configWriter.setInt(LibSvmRecordWriter.FEATURE_LAST_COLUMN, 3);
        FileSplit outputSplit = new FileSplit(tempFile);
        writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        writer.write(record);
    }

    String lineNew = FileUtils.readFileToString(tempFile).trim();
    assertEquals(lineOriginal, lineNew);
}
 
Example #24
Source File: LibSvmRecordWriterTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testNDArrayWritablesMultilabel() throws Exception {
    INDArray arr2 = Nd4j.zeros(2);
    arr2.putScalar(0, 11);
    arr2.putScalar(1, 12);
    INDArray arr3 = Nd4j.zeros(3);
    arr3.putScalar(0, 0);
    arr3.putScalar(1, 1);
    arr3.putScalar(2, 0);
    List<Writable> record = Arrays.asList((Writable) new DoubleWritable(1),
            new NDArrayWritable(arr2),
            new IntWritable(2),
            new DoubleWritable(3),
            new NDArrayWritable(arr3),
            new DoubleWritable(1));
    File tempFile = File.createTempFile("LibSvmRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    String lineOriginal = "2,4 1:1.0 2:11.0 3:12.0 4:2.0 5:3.0";

    try (LibSvmRecordWriter writer = new LibSvmRecordWriter()) {
        Configuration configWriter = new Configuration();
        configWriter.setBoolean(LibSvmRecordWriter.MULTILABEL, true);
        configWriter.setInt(LibSvmRecordWriter.FEATURE_FIRST_COLUMN, 0);
        configWriter.setInt(LibSvmRecordWriter.FEATURE_LAST_COLUMN, 3);
        FileSplit outputSplit = new FileSplit(tempFile);
        writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        writer.write(record);
    }

    String lineNew = FileUtils.readFileToString(tempFile).trim();
    assertEquals(lineOriginal, lineNew);
}
 
Example #25
Source File: LibSvmRecordWriterTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testNDArrayWritablesZeroIndex() throws Exception {
    INDArray arr2 = Nd4j.zeros(2);
    arr2.putScalar(0, 11);
    arr2.putScalar(1, 12);
    INDArray arr3 = Nd4j.zeros(3);
    arr3.putScalar(0, 0);
    arr3.putScalar(1, 1);
    arr3.putScalar(2, 0);
    List<Writable> record = Arrays.asList((Writable) new DoubleWritable(1),
            new NDArrayWritable(arr2),
            new IntWritable(2),
            new DoubleWritable(3),
            new NDArrayWritable(arr3),
            new DoubleWritable(1));
    File tempFile = File.createTempFile("LibSvmRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    String lineOriginal = "1,3 0:1.0 1:11.0 2:12.0 3:2.0 4:3.0";

    try (LibSvmRecordWriter writer = new LibSvmRecordWriter()) {
        Configuration configWriter = new Configuration();
        configWriter.setBoolean(LibSvmRecordWriter.ZERO_BASED_INDEXING, true); // NOT STANDARD!
        configWriter.setBoolean(LibSvmRecordWriter.ZERO_BASED_LABEL_INDEXING, true); // NOT STANDARD!
        configWriter.setBoolean(LibSvmRecordWriter.MULTILABEL, true);
        configWriter.setInt(LibSvmRecordWriter.FEATURE_FIRST_COLUMN, 0);
        configWriter.setInt(LibSvmRecordWriter.FEATURE_LAST_COLUMN, 3);
        FileSplit outputSplit = new FileSplit(tempFile);
        writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        writer.write(record);
    }

    String lineNew = FileUtils.readFileToString(tempFile).trim();
    assertEquals(lineOriginal, lineNew);
}
 
Example #26
Source File: CSVRecordWriterTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testWrite() throws Exception {
    File tempFile = File.createTempFile("datavec", "writer");
    tempFile.deleteOnExit();
    FileSplit fileSplit = new FileSplit(tempFile);
    CSVRecordWriter writer = new CSVRecordWriter();
    writer.initialize(fileSplit,new NumberOfRecordsPartitioner());
    List<Writable> collection = new ArrayList<>();
    collection.add(new Text("12"));
    collection.add(new Text("13"));
    collection.add(new Text("14"));

    writer.write(collection);

    CSVRecordReader reader = new CSVRecordReader(0);
    reader.initialize(new FileSplit(tempFile));
    int cnt = 0;
    while (reader.hasNext()) {
        List<Writable> line = new ArrayList<>(reader.next());
        assertEquals(3, line.size());

        assertEquals(12, line.get(0).toInt());
        assertEquals(13, line.get(1).toInt());
        assertEquals(14, line.get(2).toInt());
        cnt++;
    }
    assertEquals(1, cnt);
}
 
Example #27
Source File: CSVRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testWrite() throws Exception {
    List<List<Writable>> list = new ArrayList<>();
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < 10; i++) {
        List<Writable> temp = new ArrayList<>();
        for (int j = 0; j < 3; j++) {
            int v = 100 * i + j;
            temp.add(new IntWritable(v));
            sb.append(v);
            if (j < 2)
                sb.append(",");
            else if (i != 9)
                sb.append("\n");
        }
        list.add(temp);
    }

    String expected = sb.toString();

    Path p = Files.createTempFile("csvwritetest", "csv");
    p.toFile().deleteOnExit();

    FileRecordWriter writer = new CSVRecordWriter();
    FileSplit fileSplit = new FileSplit(p.toFile());
    writer.initialize(fileSplit,new NumberOfRecordsPartitioner());
    for (List<Writable> c : list) {
        writer.write(c);
    }
    writer.close();

    //Read file back in; compare
    String fileContents = FileUtils.readFileToString(p.toFile(), FileRecordWriter.DEFAULT_CHARSET.name());

    //        System.out.println(expected);
    //        System.out.println("----------");
    //        System.out.println(fileContents);

    assertEquals(expected, fileContents);
}
 
Example #28
Source File: PartitionerTests.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testRecordsPerFilePartition() {
    Partitioner partitioner = new NumberOfRecordsPartitioner();
    File tmpDir = Files.createTempDir();
    FileSplit fileSplit = new FileSplit(tmpDir);
    assertTrue(fileSplit.needsBootstrapForWrite());
    fileSplit.bootStrapForWrite();
    partitioner.init(fileSplit);
    assertEquals(1,partitioner.numPartitions());
}
 
Example #29
Source File: SVMLightRecordWriterTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testNDArrayWritables() throws Exception {
    INDArray arr2 = Nd4j.zeros(2);
    arr2.putScalar(0, 11);
    arr2.putScalar(1, 12);
    INDArray arr3 = Nd4j.zeros(3);
    arr3.putScalar(0, 13);
    arr3.putScalar(1, 14);
    arr3.putScalar(2, 15);
    List<Writable> record = Arrays.asList((Writable) new DoubleWritable(1),
                                        new NDArrayWritable(arr2),
                                        new IntWritable(2),
                                        new DoubleWritable(3),
                                        new NDArrayWritable(arr3),
                                        new IntWritable(4));
    File tempFile = File.createTempFile("SVMLightRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    String lineOriginal = "13.0,14.0,15.0,4 1:1.0 2:11.0 3:12.0 4:2.0 5:3.0";

    try (SVMLightRecordWriter writer = new SVMLightRecordWriter()) {
        Configuration configWriter = new Configuration();
        configWriter.setInt(SVMLightRecordWriter.FEATURE_FIRST_COLUMN, 0);
        configWriter.setInt(SVMLightRecordWriter.FEATURE_LAST_COLUMN, 3);
        FileSplit outputSplit = new FileSplit(tempFile);
        writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        writer.write(record);
    }

    String lineNew = FileUtils.readFileToString(tempFile).trim();
    assertEquals(lineOriginal, lineNew);
}
 
Example #30
Source File: SVMLightRecordWriterTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public static void executeTest(Configuration configWriter, Configuration configReader, File inputFile) throws Exception {
    File tempFile = File.createTempFile("SVMLightRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    try (SVMLightRecordWriter writer = new SVMLightRecordWriter()) {
        FileSplit outputSplit = new FileSplit(tempFile);
        writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        SVMLightRecordReader rr = new SVMLightRecordReader();
        rr.initialize(configReader, new FileSplit(inputFile));
        while (rr.hasNext()) {
            List<Writable> record = rr.next();
            writer.write(record);
        }
    }

    Pattern p = Pattern.compile(String.format("%s:\\d+ ", SVMLightRecordReader.QID_PREFIX));
    List<String> linesOriginal = new ArrayList<>();
    for (String line : FileUtils.readLines(inputFile)) {
        if (!line.startsWith(SVMLightRecordReader.COMMENT_CHAR)) {
            String lineClean = line.split(SVMLightRecordReader.COMMENT_CHAR, 2)[0];
            if (lineClean.startsWith(" ")) {
                lineClean = " " + lineClean.trim();
            } else {
                lineClean = lineClean.trim();
            }
            Matcher m = p.matcher(lineClean);
            lineClean = m.replaceAll("");
            linesOriginal.add(lineClean);
        }
    }
    List<String> linesNew = FileUtils.readLines(tempFile);
    assertEquals(linesOriginal, linesNew);
}