Java Code Examples for org.datavec.api.records.reader.impl.csv.CSVRecordReader#initialize()

The following examples show how to use org.datavec.api.records.reader.impl.csv.CSVRecordReader#initialize() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BatchInputParserMultiRecordTest.java    From konduit-serving with Apache License 2.0 6 votes vote down vote up
@Test(timeout = 60000)
public void runAdd(TestContext testContext) throws Exception {
    BatchInputArrowParserVerticle verticleRef = (BatchInputArrowParserVerticle) verticle;
    Schema irisInputSchema = TrainUtils.getIrisInputSchema();
    ArrowRecordWriter arrowRecordWriter = new ArrowRecordWriter(irisInputSchema);
    CSVRecordReader reader = new CSVRecordReader();
    reader.initialize(new FileSplit(new ClassPathResource("iris.txt").getFile()));
    List<List<Writable>> writables = reader.next(150);

    File tmpFile = new File(temporary.getRoot(), "tmp.arrow");
    FileSplit fileSplit = new FileSplit(tmpFile);
    arrowRecordWriter.initialize(fileSplit, new NumberOfRecordsPartitioner());
    arrowRecordWriter.writeBatch(writables);

    given().port(port)
            .multiPart("input1", tmpFile)
            .when().post("/")
            .then().statusCode(200);

    testContext.assertNotNull(verticleRef.getBatch(), "Inputs were null. This means parsing failed.");
    testContext.assertTrue(verticleRef.getBatch().length >= 1);
    testContext.assertNotNull(verticleRef.getBatch());
    testContext.assertEquals(150, verticleRef.getBatch().length);
}
 
Example 2
Source File: CSVRecordReaderTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test(expected = NoSuchElementException.class)
public void testCsvSkipAllLines() throws IOException, InterruptedException {
    final int numLines = 4;
    final List<Writable> lineList = Arrays.asList((Writable) new IntWritable(numLines - 1),
                    (Writable) new Text("one"), (Writable) new Text("two"), (Writable) new Text("three"));
    String header = ",one,two,three";
    List<String> lines = new ArrayList<>();
    for (int i = 0; i < numLines; i++)
        lines.add(Integer.toString(i) + header);
    File tempFile = File.createTempFile("csvSkipLines", ".csv");
    FileUtils.writeLines(tempFile, lines);

    CSVRecordReader rr = new CSVRecordReader(numLines, ',');
    rr.initialize(new FileSplit(tempFile));
    rr.reset();
    assertTrue(!rr.hasNext());
    rr.next();
}
 
Example 3
Source File: CSVRecordReaderTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
    public void testStreamReset() throws Exception {
        CSVRecordReader rr = new CSVRecordReader(0, ',');
        rr.initialize(new InputStreamInputSplit(new ClassPathResource("datavec-api/iris.dat").getInputStream()));

        int count = 0;
        while(rr.hasNext()){
            assertNotNull(rr.next());
            count++;
        }
        assertEquals(150, count);

        assertFalse(rr.resetSupported());

        try{
            rr.reset();
            fail("Expected exception");
        } catch (Exception e){
            String msg = e.getMessage();
            String msg2 = e.getCause().getMessage();
            assertTrue(msg, msg.contains("Error during LineRecordReader reset"));
            assertTrue(msg2, msg2.contains("Reset not supported from streams"));
//            e.printStackTrace();
        }
    }
 
Example 4
Source File: CSVRecordReaderTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testCsvSkipAllButOneLine() throws IOException, InterruptedException {
    final int numLines = 4;
    final List<Writable> lineList = Arrays.<Writable>asList(new Text(Integer.toString(numLines - 1)),
            new Text("one"), new Text("two"), new Text("three"));
    String header = ",one,two,three";
    List<String> lines = new ArrayList<>();
    for (int i = 0; i < numLines; i++)
        lines.add(Integer.toString(i) + header);
    File tempFile = File.createTempFile("csvSkipLines", ".csv");
    FileUtils.writeLines(tempFile, lines);

    CSVRecordReader rr = new CSVRecordReader(numLines - 1, ',');
    rr.initialize(new FileSplit(tempFile));
    rr.reset();
    assertTrue(rr.hasNext());
    assertEquals(rr.next(), lineList);
}
 
Example 5
Source File: CSVRecordReaderTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testResetWithSkipLines() throws Exception {
    CSVRecordReader rr = new CSVRecordReader(10, ',');
    rr.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));
    int lineCount = 0;
    while (rr.hasNext()) {
        rr.next();
        ++lineCount;
    }
    assertEquals(140, lineCount);
    rr.reset();
    lineCount = 0;
    while (rr.hasNext()) {
        rr.next();
        ++lineCount;
    }
    assertEquals(140, lineCount);
}
 
Example 6
Source File: CSVRecordReaderTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testReset() throws Exception {
    CSVRecordReader rr = new CSVRecordReader(0, ',');
    rr.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));

    int nResets = 5;
    for (int i = 0; i < nResets; i++) {

        int lineCount = 0;
        while (rr.hasNext()) {
            List<Writable> line = rr.next();
            assertEquals(5, line.size());
            lineCount++;
        }
        assertFalse(rr.hasNext());
        assertEquals(150, lineCount);
        rr.reset();
    }
}
 
Example 7
Source File: CSVRecordReaderTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testCsvSkipAllButOneLine() throws IOException, InterruptedException {
    final int numLines = 4;
    final List<Writable> lineList = Arrays.<Writable>asList(new Text(Integer.toString(numLines - 1)),
            new Text("one"), new Text("two"), new Text("three"));
    String header = ",one,two,three";
    List<String> lines = new ArrayList<>();
    for (int i = 0; i < numLines; i++)
        lines.add(Integer.toString(i) + header);
    File tempFile = File.createTempFile("csvSkipLines", ".csv");
    FileUtils.writeLines(tempFile, lines);

    CSVRecordReader rr = new CSVRecordReader(numLines - 1, ',');
    rr.initialize(new FileSplit(tempFile));
    rr.reset();
    assertTrue(rr.hasNext());
    assertEquals(rr.next(), lineList);
}
 
Example 8
Source File: DataSetIteratorTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testMnist() throws Exception {
    ClassPathResource cpr = new ClassPathResource("mnist_first_200.txt");
    CSVRecordReader rr = new CSVRecordReader(0, ',');
    rr.initialize(new FileSplit(cpr.getTempFileFromArchive()));
    RecordReaderDataSetIterator dsi = new RecordReaderDataSetIterator(rr, 10, 0, 10);

    MnistDataSetIterator iter = new MnistDataSetIterator(10, 200, false, true, false, 0);

    while (dsi.hasNext()) {
        DataSet dsExp = dsi.next();
        DataSet dsAct = iter.next();

        INDArray fExp = dsExp.getFeatures();
        fExp.divi(255);
        INDArray lExp = dsExp.getLabels();

        INDArray fAct = dsAct.getFeatures();
        INDArray lAct = dsAct.getLabels();

        assertEquals(fExp, fAct.castTo(fExp.dataType()));
        assertEquals(lExp, lAct.castTo(lExp.dataType()));
    }
    assertFalse(iter.hasNext());
}
 
Example 9
Source File: CSVRecordReaderTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testStreamReset() throws Exception {
    CSVRecordReader rr = new CSVRecordReader(0, ',');
    rr.initialize(new InputStreamInputSplit(new ClassPathResource("iris.dat").getInputStream()));

    int count = 0;
    while(rr.hasNext()){
        assertNotNull(rr.next());
        count++;
    }
    assertEquals(150, count);

    assertFalse(rr.resetSupported());

    try{
        rr.reset();
        fail("Expected exception");
    } catch (Exception e){
        e.printStackTrace();
    }
}
 
Example 10
Source File: TestConcatenatingRecordReader.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void test() throws Exception {

    CSVRecordReader rr = new CSVRecordReader(0, ',');
    rr.initialize(new FileSplit(new ClassPathResource("datavec-api/iris.dat").getFile()));

    CSVRecordReader rr2 = new CSVRecordReader(0, ',');
    rr2.initialize(new FileSplit(new ClassPathResource("datavec-api/iris.dat").getFile()));

    RecordReader rrC = new ConcatenatingRecordReader(rr, rr2);

    int count = 0;
    while(rrC.hasNext()){
        rrC.next();
        count++;
    }

    assertEquals(300, count);
}
 
Example 11
Source File: CSVRecordReaderTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testReset() throws Exception {
    CSVRecordReader rr = new CSVRecordReader(0, ',');
    rr.initialize(new FileSplit(new ClassPathResource("datavec-api/iris.dat").getFile()));

    int nResets = 5;
    for (int i = 0; i < nResets; i++) {

        int lineCount = 0;
        while (rr.hasNext()) {
            List<Writable> line = rr.next();
            assertEquals(5, line.size());
            lineCount++;
        }
        assertFalse(rr.hasNext());
        assertEquals(150, lineCount);
        rr.reset();
    }
}
 
Example 12
Source File: LocalTransformProcessRecordReaderTests.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void simpleTransformTest() throws Exception {
    Schema schema = new Schema.Builder().addColumnDouble("0").addColumnDouble("1").addColumnDouble("2")
            .addColumnDouble("3").addColumnDouble("4").build();
    TransformProcess transformProcess = new TransformProcess.Builder(schema).removeColumns("0").build();
    CSVRecordReader csvRecordReader = new CSVRecordReader();
    csvRecordReader.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));
    LocalTransformProcessRecordReader transformProcessRecordReader =
            new LocalTransformProcessRecordReader(csvRecordReader, transformProcess);
    assertEquals(4, transformProcessRecordReader.next().size());

}
 
Example 13
Source File: TestLineRecordReaderFunction.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testLineRecordReader() throws Exception {

    File dataFile = new ClassPathResource("iris.dat").getFile();
    List<String> lines = FileUtils.readLines(dataFile);

    JavaSparkContext sc = getContext();
    JavaRDD<String> linesRdd = sc.parallelize(lines);

    CSVRecordReader rr = new CSVRecordReader(0, ',');

    JavaRDD<List<Writable>> out = linesRdd.map(new LineRecordReaderFunction(rr));
    List<List<Writable>> outList = out.collect();


    CSVRecordReader rr2 = new CSVRecordReader(0, ',');
    rr2.initialize(new FileSplit(dataFile));
    Set<List<Writable>> expectedSet = new HashSet<>();
    int totalCount = 0;
    while (rr2.hasNext()) {
        expectedSet.add(rr2.next());
        totalCount++;
    }

    assertEquals(totalCount, outList.size());

    for (List<Writable> line : outList) {
        assertTrue(expectedSet.contains(line));
    }
}
 
Example 14
Source File: CSVRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testPipesAsSplit() throws Exception {

    CSVRecordReader reader = new CSVRecordReader(0, '|');
    reader.initialize(new FileSplit(new ClassPathResource("datavec-api/issue414.csv").getFile()));
    int lineidx = 0;
    List<Integer> sixthColumn = Arrays.asList(13, 95, 15, 25);
    while (reader.hasNext()) {
        List<Writable> list = new ArrayList<>(reader.next());

        assertEquals(10, list.size());
        assertEquals((long)sixthColumn.get(lineidx), list.get(5).toInt());
        lineidx++;
    }
}
 
Example 15
Source File: CSVRecordReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testEmptyEntries() throws Exception {
    CSVRecordReader reader = new CSVRecordReader();
    reader.initialize(new StringSplit("1,1,8.0,,,,14.0,,,,15.0,,,,,,,,,,,,"));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        assertEquals("Entry count", 23, vals.size());
    }
}
 
Example 16
Source File: CSVRecordWriterTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testWrite() throws Exception {
    File tempFile = File.createTempFile("datavec", "writer");
    tempFile.deleteOnExit();
    FileSplit fileSplit = new FileSplit(tempFile);
    CSVRecordWriter writer = new CSVRecordWriter();
    writer.initialize(fileSplit,new NumberOfRecordsPartitioner());
    List<Writable> collection = new ArrayList<>();
    collection.add(new Text("12"));
    collection.add(new Text("13"));
    collection.add(new Text("14"));

    writer.write(collection);

    CSVRecordReader reader = new CSVRecordReader(0);
    reader.initialize(new FileSplit(tempFile));
    int cnt = 0;
    while (reader.hasNext()) {
        List<Writable> line = new ArrayList<>(reader.next());
        assertEquals(3, line.size());

        assertEquals(12, line.get(0).toInt());
        assertEquals(13, line.get(1).toInt());
        assertEquals(14, line.get(2).toInt());
        cnt++;
    }
    assertEquals(1, cnt);
}
 
Example 17
Source File: LocalTransformProcessRecordReaderTests.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void simpleTransformTest() throws Exception {
    Schema schema = new Schema.Builder().addColumnDouble("0").addColumnDouble("1").addColumnDouble("2")
            .addColumnDouble("3").addColumnDouble("4").build();
    TransformProcess transformProcess = new TransformProcess.Builder(schema).removeColumns("0").build();
    CSVRecordReader csvRecordReader = new CSVRecordReader();
    csvRecordReader.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));
    LocalTransformProcessRecordReader transformProcessRecordReader =
            new LocalTransformProcessRecordReader(csvRecordReader, transformProcess);
    assertEquals(4, transformProcessRecordReader.next().size());

}
 
Example 18
Source File: CSVVariableSlidingWindowRecordReaderTest.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Test
public void testCSVVariableSlidingWindowRecordReaderStride() throws Exception {
    int maxLinesPerSequence = 3;
    int stride = 2;

    SequenceRecordReader seqRR = new CSVVariableSlidingWindowRecordReader(maxLinesPerSequence, stride);
    seqRR.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));

    CSVRecordReader rr = new CSVRecordReader();
    rr.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));

    int count = 0;
    while (seqRR.hasNext()) {
        List<List<Writable>> next = seqRR.sequenceRecord();

        if(count==maxLinesPerSequence-1) {
            LinkedList<List<Writable>> expected = new LinkedList<>();
            for(int s = 0; s < stride; s++) {
                expected = new LinkedList<>();
                for (int i = 0; i < maxLinesPerSequence; i++) {
                    expected.addFirst(rr.next());
                }
            }
            assertEquals(expected, next);

        }
        if(count==maxLinesPerSequence) {
            assertEquals(maxLinesPerSequence, next.size());
        }
        if(count==0) { // first seq should be length 2
            assertEquals(2, next.size());
        }
        if(count>151) { // last seq should be length 1
            assertEquals(1, next.size());
        }

        count++;
    }

    assertEquals(76, count);
}
 
Example 19
Source File: CSVRecordReaderTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testMeta() throws Exception {
    CSVRecordReader rr = new CSVRecordReader(0, ',');
    rr.initialize(new FileSplit(new ClassPathResource("datavec-api/iris.dat").getFile()));

    int lineCount = 0;
    List<RecordMetaData> metaList = new ArrayList<>();
    List<List<Writable>> writables = new ArrayList<>();
    while (rr.hasNext()) {
        Record r = rr.nextRecord();
        assertEquals(5, r.getRecord().size());
        lineCount++;
        RecordMetaData meta = r.getMetaData();
        //            System.out.println(r.getRecord() + "\t" + meta.getLocation() + "\t" + meta.getURI());

        metaList.add(meta);
        writables.add(r.getRecord());
    }
    assertFalse(rr.hasNext());
    assertEquals(150, lineCount);
    rr.reset();


    System.out.println("\n\n\n--------------------------------");
    List<Record> contents = rr.loadFromMetaData(metaList);
    assertEquals(150, contents.size());
    //        for(Record r : contents ){
    //            System.out.println(r);
    //        }

    List<RecordMetaData> meta2 = new ArrayList<>();
    meta2.add(metaList.get(100));
    meta2.add(metaList.get(90));
    meta2.add(metaList.get(80));
    meta2.add(metaList.get(70));
    meta2.add(metaList.get(60));

    List<Record> contents2 = rr.loadFromMetaData(meta2);
    assertEquals(writables.get(100), contents2.get(0).getRecord());
    assertEquals(writables.get(90), contents2.get(1).getRecord());
    assertEquals(writables.get(80), contents2.get(2).getRecord());
    assertEquals(writables.get(70), contents2.get(3).getRecord());
    assertEquals(writables.get(60), contents2.get(4).getRecord());
}
 
Example 20
Source File: CSVRecordReaderTest.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Test
public void testMeta() throws Exception {
    CSVRecordReader rr = new CSVRecordReader(0, ',');
    rr.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));

    int lineCount = 0;
    List<RecordMetaData> metaList = new ArrayList<>();
    List<List<Writable>> writables = new ArrayList<>();
    while (rr.hasNext()) {
        Record r = rr.nextRecord();
        assertEquals(5, r.getRecord().size());
        lineCount++;
        RecordMetaData meta = r.getMetaData();
        //            System.out.println(r.getRecord() + "\t" + meta.getLocation() + "\t" + meta.getURI());

        metaList.add(meta);
        writables.add(r.getRecord());
    }
    assertFalse(rr.hasNext());
    assertEquals(150, lineCount);
    rr.reset();


    System.out.println("\n\n\n--------------------------------");
    List<Record> contents = rr.loadFromMetaData(metaList);
    assertEquals(150, contents.size());
    //        for(Record r : contents ){
    //            System.out.println(r);
    //        }

    List<RecordMetaData> meta2 = new ArrayList<>();
    meta2.add(metaList.get(100));
    meta2.add(metaList.get(90));
    meta2.add(metaList.get(80));
    meta2.add(metaList.get(70));
    meta2.add(metaList.get(60));

    List<Record> contents2 = rr.loadFromMetaData(meta2);
    assertEquals(writables.get(100), contents2.get(0).getRecord());
    assertEquals(writables.get(90), contents2.get(1).getRecord());
    assertEquals(writables.get(80), contents2.get(2).getRecord());
    assertEquals(writables.get(70), contents2.get(3).getRecord());
    assertEquals(writables.get(60), contents2.get(4).getRecord());
}