Java Code Examples for org.datavec.api.records.reader.impl.csv.CSVRecordReader#next()

The following examples show how to use org.datavec.api.records.reader.impl.csv.CSVRecordReader#next() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BatchInputParserMultiRecordTest.java    From konduit-serving with Apache License 2.0 6 votes vote down vote up
@Test(timeout = 60000)
public void runAdd(TestContext testContext) throws Exception {
    BatchInputArrowParserVerticle verticleRef = (BatchInputArrowParserVerticle) verticle;
    Schema irisInputSchema = TrainUtils.getIrisInputSchema();
    ArrowRecordWriter arrowRecordWriter = new ArrowRecordWriter(irisInputSchema);
    CSVRecordReader reader = new CSVRecordReader();
    reader.initialize(new FileSplit(new ClassPathResource("iris.txt").getFile()));
    List<List<Writable>> writables = reader.next(150);

    File tmpFile = new File(temporary.getRoot(), "tmp.arrow");
    FileSplit fileSplit = new FileSplit(tmpFile);
    arrowRecordWriter.initialize(fileSplit, new NumberOfRecordsPartitioner());
    arrowRecordWriter.writeBatch(writables);

    given().port(port)
            .multiPart("input1", tmpFile)
            .when().post("/")
            .then().statusCode(200);

    testContext.assertNotNull(verticleRef.getBatch(), "Inputs were null. This means parsing failed.");
    testContext.assertTrue(verticleRef.getBatch().length >= 1);
    testContext.assertNotNull(verticleRef.getBatch());
    testContext.assertEquals(150, verticleRef.getBatch().length);
}
 
Example 2
Source File: ArrowBinaryInputAdapterTest.java    From konduit-serving with Apache License 2.0 6 votes vote down vote up
@Test(timeout = 60000)

    public void testArrowBinary() throws Exception {
        Schema irisInputSchema = TrainUtils.getIrisInputSchema();
        ArrowRecordWriter arrowRecordWriter = new ArrowRecordWriter(irisInputSchema);
        CSVRecordReader reader = new CSVRecordReader();
        reader.initialize(new FileSplit(new ClassPathResource("iris.txt").getFile()));
        List<List<Writable>> writables = reader.next(150);

        File tmpFile = new File(temporary.getRoot(), "tmp.arrow");
        FileSplit fileSplit = new FileSplit(tmpFile);
        arrowRecordWriter.initialize(fileSplit, new NumberOfRecordsPartitioner());
        arrowRecordWriter.writeBatch(writables);
        byte[] arrowBytes = FileUtils.readFileToByteArray(tmpFile);

        Buffer buffer = Buffer.buffer(arrowBytes);
        ArrowBinaryInputAdapter arrowBinaryInputAdapter = new ArrowBinaryInputAdapter();
        ArrowWritableRecordBatch convert = arrowBinaryInputAdapter.convert(buffer, ConverterArgs.builder().schema(irisInputSchema).build(), null);
        assertEquals(writables.size(), convert.size());
    }
 
Example 3
Source File: CSVRecordReaderTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testReset() throws Exception {
    CSVRecordReader rr = new CSVRecordReader(0, ',');
    rr.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));

    int nResets = 5;
    for (int i = 0; i < nResets; i++) {

        int lineCount = 0;
        while (rr.hasNext()) {
            List<Writable> line = rr.next();
            assertEquals(5, line.size());
            lineCount++;
        }
        assertFalse(rr.hasNext());
        assertEquals(150, lineCount);
        rr.reset();
    }
}
 
Example 4
Source File: CSVRecordReaderTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testResetWithSkipLines() throws Exception {
    CSVRecordReader rr = new CSVRecordReader(10, ',');
    rr.initialize(new FileSplit(new ClassPathResource("datavec-api/iris.dat").getFile()));
    int lineCount = 0;
    while (rr.hasNext()) {
        rr.next();
        ++lineCount;
    }
    assertEquals(140, lineCount);
    rr.reset();
    lineCount = 0;
    while (rr.hasNext()) {
        rr.next();
        ++lineCount;
    }
    assertEquals(140, lineCount);
}
 
Example 5
Source File: CSVRecordReaderTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testReset() throws Exception {
    CSVRecordReader rr = new CSVRecordReader(0, ',');
    rr.initialize(new FileSplit(new ClassPathResource("datavec-api/iris.dat").getFile()));

    int nResets = 5;
    for (int i = 0; i < nResets; i++) {

        int lineCount = 0;
        while (rr.hasNext()) {
            List<Writable> line = rr.next();
            assertEquals(5, line.size());
            lineCount++;
        }
        assertFalse(rr.hasNext());
        assertEquals(150, lineCount);
        rr.reset();
    }
}
 
Example 6
Source File: CSVRecordReaderTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test(expected = NoSuchElementException.class)
public void testCsvSkipAllLines() throws IOException, InterruptedException {
    final int numLines = 4;
    final List<Writable> lineList = Arrays.asList((Writable) new IntWritable(numLines - 1),
                    (Writable) new Text("one"), (Writable) new Text("two"), (Writable) new Text("three"));
    String header = ",one,two,three";
    List<String> lines = new ArrayList<>();
    for (int i = 0; i < numLines; i++)
        lines.add(Integer.toString(i) + header);
    File tempFile = File.createTempFile("csvSkipLines", ".csv");
    FileUtils.writeLines(tempFile, lines);

    CSVRecordReader rr = new CSVRecordReader(numLines, ',');
    rr.initialize(new FileSplit(tempFile));
    rr.reset();
    assertTrue(!rr.hasNext());
    rr.next();
}
 
Example 7
Source File: CSVRecordReaderTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test(expected = NoSuchElementException.class)
public void testCsvSkipAllLines() throws IOException, InterruptedException {
    final int numLines = 4;
    final List<Writable> lineList = Arrays.asList((Writable) new IntWritable(numLines - 1),
                    (Writable) new Text("one"), (Writable) new Text("two"), (Writable) new Text("three"));
    String header = ",one,two,three";
    List<String> lines = new ArrayList<>();
    for (int i = 0; i < numLines; i++)
        lines.add(Integer.toString(i) + header);
    File tempFile = File.createTempFile("csvSkipLines", ".csv");
    FileUtils.writeLines(tempFile, lines);

    CSVRecordReader rr = new CSVRecordReader(numLines, ',');
    rr.initialize(new FileSplit(tempFile));
    rr.reset();
    assertTrue(!rr.hasNext());
    rr.next();
}
 
Example 8
Source File: CSVRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testWithQuotes() throws Exception {
    CSVRecordReader reader = new CSVRecordReader(0, ',', '\"');
    reader.initialize(new StringSplit("1,0,3,\"Braund, Mr. Owen Harris\",male,\"\"\"\""));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        assertEquals("Entry count", 6, vals.size());
        assertEquals("1", vals.get(0).toString());
        assertEquals("0", vals.get(1).toString());
        assertEquals("3", vals.get(2).toString());
        assertEquals("Braund, Mr. Owen Harris", vals.get(3).toString());
        assertEquals("male", vals.get(4).toString());
        assertEquals("\"", vals.get(5).toString());
    }
}
 
Example 9
Source File: CSVRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testPipesAsSplit() throws Exception {

    CSVRecordReader reader = new CSVRecordReader(0, '|');
    reader.initialize(new FileSplit(new ClassPathResource("datavec-api/issue414.csv").getFile()));
    int lineidx = 0;
    List<Integer> sixthColumn = Arrays.asList(13, 95, 15, 25);
    while (reader.hasNext()) {
        List<Writable> list = new ArrayList<>(reader.next());

        assertEquals(10, list.size());
        assertEquals((long)sixthColumn.get(lineidx), list.get(5).toInt());
        lineidx++;
    }
}
 
Example 10
Source File: CSVRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testTabsAsSplit1() throws Exception {

    CSVRecordReader reader = new CSVRecordReader(0, '\t');
    reader.initialize(new FileSplit(new ClassPathResource("datavec-api/tabbed.txt").getFile()));
    while (reader.hasNext()) {
        List<Writable> list = new ArrayList<>(reader.next());

        assertEquals(2, list.size());
    }
}
 
Example 11
Source File: TestStreamInputSplit.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testCsvSimple() throws Exception {
    File dir = testDir.newFolder();
    File f1 = new File(dir, "file1.txt");
    File f2 = new File(dir, "file2.txt");

    FileUtils.writeStringToFile(f1, "a,b,c\nd,e,f", StandardCharsets.UTF_8);
    FileUtils.writeStringToFile(f2, "1,2,3", StandardCharsets.UTF_8);

    List<URI> uris = Arrays.asList(f1.toURI(), f2.toURI());

    CSVRecordReader rr = new CSVRecordReader();

    TestStreamFunction fn = new TestStreamFunction();
    InputSplit is = new StreamInputSplit(uris, fn);
    rr.initialize(is);

    List<List<Writable>> exp = new ArrayList<>();
    exp.add(Arrays.<Writable>asList(new Text("a"), new Text("b"), new Text("c")));
    exp.add(Arrays.<Writable>asList(new Text("d"), new Text("e"), new Text("f")));
    exp.add(Arrays.<Writable>asList(new Text("1"), new Text("2"), new Text("3")));

    List<List<Writable>> act = new ArrayList<>();
    while(rr.hasNext()){
        act.add(rr.next());
    }

    assertEquals(exp, act);

    //Check that the specified stream loading function was used, not the default:
    assertEquals(uris, fn.calledWithUris);

    rr.reset();
    int count = 0;
    while(rr.hasNext()) {
        count++;
        rr.next();
    }
    assertEquals(3, count);
}
 
Example 12
Source File: CSVRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testNext() throws Exception {
    CSVRecordReader reader = new CSVRecordReader();
    reader.initialize(new StringSplit("1,1,8.0,,,,14.0,,,,15.0,,,,,,,,,,,,1"));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        List<Writable> arr = new ArrayList<>(vals);

        assertEquals("Entry count", 23, vals.size());
        Text lastEntry = (Text) arr.get(arr.size() - 1);
        assertEquals("Last entry garbage", 1, lastEntry.getLength());
    }
}
 
Example 13
Source File: CSVRecordReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testRegex() throws Exception {
    CSVRecordReader reader = new CSVRegexRecordReader(0, ",", null, new String[] {null, "(.+) (.+) (.+)"});
    reader.initialize(new StringSplit("normal,1.2.3.4 space separator"));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        assertEquals("Entry count", 4, vals.size());
        assertEquals("normal", vals.get(0).toString());
        assertEquals("1.2.3.4", vals.get(1).toString());
        assertEquals("space", vals.get(2).toString());
        assertEquals("separator", vals.get(3).toString());
    }
}
 
Example 14
Source File: CSVRecordReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testWithQuotes() throws Exception {
    CSVRecordReader reader = new CSVRecordReader(0, ',', '\"');
    reader.initialize(new StringSplit("1,0,3,\"Braund, Mr. Owen Harris\",male,\"\"\"\""));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        assertEquals("Entry count", 6, vals.size());
        assertEquals("1", vals.get(0).toString());
        assertEquals("0", vals.get(1).toString());
        assertEquals("3", vals.get(2).toString());
        assertEquals("Braund, Mr. Owen Harris", vals.get(3).toString());
        assertEquals("male", vals.get(4).toString());
        assertEquals("\"", vals.get(5).toString());
    }
}
 
Example 15
Source File: CSVRecordReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testPipesAsSplit() throws Exception {

    CSVRecordReader reader = new CSVRecordReader(0, '|');
    reader.initialize(new FileSplit(new ClassPathResource("issue414.csv").getFile()));
    int lineidx = 0;
    List<Integer> sixthColumn = Arrays.asList(13, 95, 15, 25);
    while (reader.hasNext()) {
        List<Writable> list = new ArrayList<>(reader.next());

        assertEquals(10, list.size());
        assertEquals((long)sixthColumn.get(lineidx), list.get(5).toInt());
        lineidx++;
    }
}
 
Example 16
Source File: CSVRecordReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testTabsAsSplit1() throws Exception {

    CSVRecordReader reader = new CSVRecordReader(0, '\t');
    reader.initialize(new FileSplit(new ClassPathResource("/tabbed.txt").getFile()));
    while (reader.hasNext()) {
        List<Writable> list = new ArrayList<>(reader.next());

        assertEquals(2, list.size());
    }
}
 
Example 17
Source File: CSVRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testRegex() throws Exception {
    CSVRecordReader reader = new CSVRegexRecordReader(0, ",", null, new String[] {null, "(.+) (.+) (.+)"});
    reader.initialize(new StringSplit("normal,1.2.3.4 space separator"));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        assertEquals("Entry count", 4, vals.size());
        assertEquals("normal", vals.get(0).toString());
        assertEquals("1.2.3.4", vals.get(1).toString());
        assertEquals("space", vals.get(2).toString());
        assertEquals("separator", vals.get(3).toString());
    }
}
 
Example 18
Source File: CSVRecordReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testEmptyEntries() throws Exception {
    CSVRecordReader reader = new CSVRecordReader();
    reader.initialize(new StringSplit("1,1,8.0,,,,14.0,,,,15.0,,,,,,,,,,,,"));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        assertEquals("Entry count", 23, vals.size());
    }
}
 
Example 19
Source File: CSVRecordReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testNext() throws Exception {
    CSVRecordReader reader = new CSVRecordReader();
    reader.initialize(new StringSplit("1,1,8.0,,,,14.0,,,,15.0,,,,,,,,,,,,1"));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        List<Writable> arr = new ArrayList<>(vals);

        assertEquals("Entry count", 23, vals.size());
        Text lastEntry = (Text) arr.get(arr.size() - 1);
        assertEquals("Last entry garbage", 1, lastEntry.getLength());
    }
}
 
Example 20
Source File: CSVRecordWriterTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testWrite() throws Exception {
    File tempFile = File.createTempFile("datavec", "writer");
    tempFile.deleteOnExit();
    FileSplit fileSplit = new FileSplit(tempFile);
    CSVRecordWriter writer = new CSVRecordWriter();
    writer.initialize(fileSplit,new NumberOfRecordsPartitioner());
    List<Writable> collection = new ArrayList<>();
    collection.add(new Text("12"));
    collection.add(new Text("13"));
    collection.add(new Text("14"));

    writer.write(collection);

    CSVRecordReader reader = new CSVRecordReader(0);
    reader.initialize(new FileSplit(tempFile));
    int cnt = 0;
    while (reader.hasNext()) {
        List<Writable> line = new ArrayList<>(reader.next());
        assertEquals(3, line.size());

        assertEquals(12, line.get(0).toInt());
        assertEquals(13, line.get(1).toInt());
        assertEquals(14, line.get(2).toInt());
        cnt++;
    }
    assertEquals(1, cnt);
}