Java Code Examples for org.datavec.api.records.reader.impl.csv.CSVRecordReader#hasNext()

The following examples show how to use org.datavec.api.records.reader.impl.csv.CSVRecordReader#hasNext() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CSVRecordReaderTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testReset() throws Exception {
    CSVRecordReader rr = new CSVRecordReader(0, ',');
    rr.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));

    int nResets = 5;
    for (int i = 0; i < nResets; i++) {

        int lineCount = 0;
        while (rr.hasNext()) {
            List<Writable> line = rr.next();
            assertEquals(5, line.size());
            lineCount++;
        }
        assertFalse(rr.hasNext());
        assertEquals(150, lineCount);
        rr.reset();
    }
}
 
Example 2
Source File: CSVRecordReaderTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testStreamReset() throws Exception {
    CSVRecordReader rr = new CSVRecordReader(0, ',');
    rr.initialize(new InputStreamInputSplit(new ClassPathResource("iris.dat").getInputStream()));

    int count = 0;
    while(rr.hasNext()){
        assertNotNull(rr.next());
        count++;
    }
    assertEquals(150, count);

    assertFalse(rr.resetSupported());

    try{
        rr.reset();
        fail("Expected exception");
    } catch (Exception e){
        e.printStackTrace();
    }
}
 
Example 3
Source File: CSVRecordReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testRegex() throws Exception {
    CSVRecordReader reader = new CSVRegexRecordReader(0, ",", null, new String[] {null, "(.+) (.+) (.+)"});
    reader.initialize(new StringSplit("normal,1.2.3.4 space separator"));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        assertEquals("Entry count", 4, vals.size());
        assertEquals("normal", vals.get(0).toString());
        assertEquals("1.2.3.4", vals.get(1).toString());
        assertEquals("space", vals.get(2).toString());
        assertEquals("separator", vals.get(3).toString());
    }
}
 
Example 4
Source File: CSVRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testTabsAsSplit1() throws Exception {

    CSVRecordReader reader = new CSVRecordReader(0, '\t');
    reader.initialize(new FileSplit(new ClassPathResource("datavec-api/tabbed.txt").getFile()));
    while (reader.hasNext()) {
        List<Writable> list = new ArrayList<>(reader.next());

        assertEquals(2, list.size());
    }
}
 
Example 5
Source File: CSVRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testRegex() throws Exception {
    CSVRecordReader reader = new CSVRegexRecordReader(0, ",", null, new String[] {null, "(.+) (.+) (.+)"});
    reader.initialize(new StringSplit("normal,1.2.3.4 space separator"));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        assertEquals("Entry count", 4, vals.size());
        assertEquals("normal", vals.get(0).toString());
        assertEquals("1.2.3.4", vals.get(1).toString());
        assertEquals("space", vals.get(2).toString());
        assertEquals("separator", vals.get(3).toString());
    }
}
 
Example 6
Source File: CSVRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testPipesAsSplit() throws Exception {

    CSVRecordReader reader = new CSVRecordReader(0, '|');
    reader.initialize(new FileSplit(new ClassPathResource("datavec-api/issue414.csv").getFile()));
    int lineidx = 0;
    List<Integer> sixthColumn = Arrays.asList(13, 95, 15, 25);
    while (reader.hasNext()) {
        List<Writable> list = new ArrayList<>(reader.next());

        assertEquals(10, list.size());
        assertEquals((long)sixthColumn.get(lineidx), list.get(5).toInt());
        lineidx++;
    }
}
 
Example 7
Source File: CSVRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testEmptyEntries() throws Exception {
    CSVRecordReader reader = new CSVRecordReader();
    reader.initialize(new StringSplit("1,1,8.0,,,,14.0,,,,15.0,,,,,,,,,,,,"));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        assertEquals("Entry count", 23, vals.size());
    }
}
 
Example 8
Source File: CSVRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testNext() throws Exception {
    CSVRecordReader reader = new CSVRecordReader();
    reader.initialize(new StringSplit("1,1,8.0,,,,14.0,,,,15.0,,,,,,,,,,,,1"));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        List<Writable> arr = new ArrayList<>(vals);

        assertEquals("Entry count", 23, vals.size());
        Text lastEntry = (Text) arr.get(arr.size() - 1);
        assertEquals("Last entry garbage", 1, lastEntry.getLength());
    }
}
 
Example 9
Source File: CSVRecordWriterTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testWrite() throws Exception {
    File tempFile = File.createTempFile("datavec", "writer");
    tempFile.deleteOnExit();
    FileSplit fileSplit = new FileSplit(tempFile);
    CSVRecordWriter writer = new CSVRecordWriter();
    writer.initialize(fileSplit,new NumberOfRecordsPartitioner());
    List<Writable> collection = new ArrayList<>();
    collection.add(new Text("12"));
    collection.add(new Text("13"));
    collection.add(new Text("14"));

    writer.write(collection);

    CSVRecordReader reader = new CSVRecordReader(0);
    reader.initialize(new FileSplit(tempFile));
    int cnt = 0;
    while (reader.hasNext()) {
        List<Writable> line = new ArrayList<>(reader.next());
        assertEquals(3, line.size());

        assertEquals(12, line.get(0).toInt());
        assertEquals(13, line.get(1).toInt());
        assertEquals(14, line.get(2).toInt());
        cnt++;
    }
    assertEquals(1, cnt);
}
 
Example 10
Source File: CSVRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testWithQuotes() throws Exception {
    CSVRecordReader reader = new CSVRecordReader(0, ',', '\"');
    reader.initialize(new StringSplit("1,0,3,\"Braund, Mr. Owen Harris\",male,\"\"\"\""));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        assertEquals("Entry count", 6, vals.size());
        assertEquals("1", vals.get(0).toString());
        assertEquals("0", vals.get(1).toString());
        assertEquals("3", vals.get(2).toString());
        assertEquals("Braund, Mr. Owen Harris", vals.get(3).toString());
        assertEquals("male", vals.get(4).toString());
        assertEquals("\"", vals.get(5).toString());
    }
}
 
Example 11
Source File: TestLineRecordReaderFunction.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testLineRecordReader() throws Exception {

    File dataFile = new ClassPathResource("iris.dat").getFile();
    List<String> lines = FileUtils.readLines(dataFile);

    JavaSparkContext sc = getContext();
    JavaRDD<String> linesRdd = sc.parallelize(lines);

    CSVRecordReader rr = new CSVRecordReader(0, ',');

    JavaRDD<List<Writable>> out = linesRdd.map(new LineRecordReaderFunction(rr));
    List<List<Writable>> outList = out.collect();


    CSVRecordReader rr2 = new CSVRecordReader(0, ',');
    rr2.initialize(new FileSplit(dataFile));
    Set<List<Writable>> expectedSet = new HashSet<>();
    int totalCount = 0;
    while (rr2.hasNext()) {
        expectedSet.add(rr2.next());
        totalCount++;
    }

    assertEquals(totalCount, outList.size());

    for (List<Writable> line : outList) {
        assertTrue(expectedSet.contains(line));
    }
}
 
Example 12
Source File: TestLineRecordReaderFunction.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testLineRecordReader() throws Exception {

    File dataFile = new ClassPathResource("iris.dat").getFile();
    List<String> lines = FileUtils.readLines(dataFile);

    List<String> linesRdd = (lines);

    CSVRecordReader rr = new CSVRecordReader(0, ',');

    List<List<Writable>> out = linesRdd.stream().map(input -> new LineRecordReaderFunction(rr).apply(input)).collect(Collectors.toList());
    List<List<Writable>> outList = out;


    CSVRecordReader rr2 = new CSVRecordReader(0, ',');
    rr2.initialize(new FileSplit(dataFile));
    Set<List<Writable>> expectedSet = new HashSet<>();
    int totalCount = 0;
    while (rr2.hasNext()) {
        expectedSet.add(rr2.next());
        totalCount++;
    }

    assertEquals(totalCount, outList.size());

    for (List<Writable> line : outList) {
        assertTrue(expectedSet.contains(line));
    }
}
 
Example 13
Source File: TestStreamInputSplit.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testCsvSimple() throws Exception {
    File dir = testDir.newFolder();
    File f1 = new File(dir, "file1.txt");
    File f2 = new File(dir, "file2.txt");

    FileUtils.writeStringToFile(f1, "a,b,c\nd,e,f", StandardCharsets.UTF_8);
    FileUtils.writeStringToFile(f2, "1,2,3", StandardCharsets.UTF_8);

    List<URI> uris = Arrays.asList(f1.toURI(), f2.toURI());

    CSVRecordReader rr = new CSVRecordReader();

    TestStreamFunction fn = new TestStreamFunction();
    InputSplit is = new StreamInputSplit(uris, fn);
    rr.initialize(is);

    List<List<Writable>> exp = new ArrayList<>();
    exp.add(Arrays.<Writable>asList(new Text("a"), new Text("b"), new Text("c")));
    exp.add(Arrays.<Writable>asList(new Text("d"), new Text("e"), new Text("f")));
    exp.add(Arrays.<Writable>asList(new Text("1"), new Text("2"), new Text("3")));

    List<List<Writable>> act = new ArrayList<>();
    while(rr.hasNext()){
        act.add(rr.next());
    }

    assertEquals(exp, act);

    //Check that the specified stream loading function was used, not the default:
    assertEquals(uris, fn.calledWithUris);

    rr.reset();
    int count = 0;
    while(rr.hasNext()) {
        count++;
        rr.next();
    }
    assertEquals(3, count);
}
 
Example 14
Source File: CSVRecordReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testTabsAsSplit1() throws Exception {

    CSVRecordReader reader = new CSVRecordReader(0, '\t');
    reader.initialize(new FileSplit(new ClassPathResource("/tabbed.txt").getFile()));
    while (reader.hasNext()) {
        List<Writable> list = new ArrayList<>(reader.next());

        assertEquals(2, list.size());
    }
}
 
Example 15
Source File: TestLineRecordReaderFunction.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testLineRecordReader() throws Exception {

    File dataFile = new ClassPathResource("iris.dat").getFile();
    List<String> lines = FileUtils.readLines(dataFile);

    JavaSparkContext sc = getContext();
    JavaRDD<String> linesRdd = sc.parallelize(lines);

    CSVRecordReader rr = new CSVRecordReader(0, ',');

    JavaRDD<List<Writable>> out = linesRdd.map(new LineRecordReaderFunction(rr));
    List<List<Writable>> outList = out.collect();


    CSVRecordReader rr2 = new CSVRecordReader(0, ',');
    rr2.initialize(new FileSplit(dataFile));
    Set<List<Writable>> expectedSet = new HashSet<>();
    int totalCount = 0;
    while (rr2.hasNext()) {
        expectedSet.add(rr2.next());
        totalCount++;
    }

    assertEquals(totalCount, outList.size());

    for (List<Writable> line : outList) {
        assertTrue(expectedSet.contains(line));
    }
}
 
Example 16
Source File: CSVRecordReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testEmptyEntries() throws Exception {
    CSVRecordReader reader = new CSVRecordReader();
    reader.initialize(new StringSplit("1,1,8.0,,,,14.0,,,,15.0,,,,,,,,,,,,"));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        assertEquals("Entry count", 23, vals.size());
    }
}
 
Example 17
Source File: CSVRecordReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testNext() throws Exception {
    CSVRecordReader reader = new CSVRecordReader();
    reader.initialize(new StringSplit("1,1,8.0,,,,14.0,,,,15.0,,,,,,,,,,,,1"));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        List<Writable> arr = new ArrayList<>(vals);

        assertEquals("Entry count", 23, vals.size());
        Text lastEntry = (Text) arr.get(arr.size() - 1);
        assertEquals("Last entry garbage", 1, lastEntry.getLength());
    }
}
 
Example 18
Source File: CSVRecordWriterTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testWrite() throws Exception {
    File tempFile = File.createTempFile("datavec", "writer");
    tempFile.deleteOnExit();
    FileSplit fileSplit = new FileSplit(tempFile);
    CSVRecordWriter writer = new CSVRecordWriter();
    writer.initialize(fileSplit,new NumberOfRecordsPartitioner());
    List<Writable> collection = new ArrayList<>();
    collection.add(new Text("12"));
    collection.add(new Text("13"));
    collection.add(new Text("14"));

    writer.write(collection);

    CSVRecordReader reader = new CSVRecordReader(0);
    reader.initialize(new FileSplit(tempFile));
    int cnt = 0;
    while (reader.hasNext()) {
        List<Writable> line = new ArrayList<>(reader.next());
        assertEquals(3, line.size());

        assertEquals(12, line.get(0).toInt());
        assertEquals(13, line.get(1).toInt());
        assertEquals(14, line.get(2).toInt());
        cnt++;
    }
    assertEquals(1, cnt);
}
 
Example 19
Source File: CSVRecordReaderTest.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Test
public void testMeta() throws Exception {
    CSVRecordReader rr = new CSVRecordReader(0, ',');
    rr.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));

    int lineCount = 0;
    List<RecordMetaData> metaList = new ArrayList<>();
    List<List<Writable>> writables = new ArrayList<>();
    while (rr.hasNext()) {
        Record r = rr.nextRecord();
        assertEquals(5, r.getRecord().size());
        lineCount++;
        RecordMetaData meta = r.getMetaData();
        //            System.out.println(r.getRecord() + "\t" + meta.getLocation() + "\t" + meta.getURI());

        metaList.add(meta);
        writables.add(r.getRecord());
    }
    assertFalse(rr.hasNext());
    assertEquals(150, lineCount);
    rr.reset();


    System.out.println("\n\n\n--------------------------------");
    List<Record> contents = rr.loadFromMetaData(metaList);
    assertEquals(150, contents.size());
    //        for(Record r : contents ){
    //            System.out.println(r);
    //        }

    List<RecordMetaData> meta2 = new ArrayList<>();
    meta2.add(metaList.get(100));
    meta2.add(metaList.get(90));
    meta2.add(metaList.get(80));
    meta2.add(metaList.get(70));
    meta2.add(metaList.get(60));

    List<Record> contents2 = rr.loadFromMetaData(meta2);
    assertEquals(writables.get(100), contents2.get(0).getRecord());
    assertEquals(writables.get(90), contents2.get(1).getRecord());
    assertEquals(writables.get(80), contents2.get(2).getRecord());
    assertEquals(writables.get(70), contents2.get(3).getRecord());
    assertEquals(writables.get(60), contents2.get(4).getRecord());
}
 
Example 20
Source File: CSVRecordReaderTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testMeta() throws Exception {
    CSVRecordReader rr = new CSVRecordReader(0, ',');
    rr.initialize(new FileSplit(new ClassPathResource("datavec-api/iris.dat").getFile()));

    int lineCount = 0;
    List<RecordMetaData> metaList = new ArrayList<>();
    List<List<Writable>> writables = new ArrayList<>();
    while (rr.hasNext()) {
        Record r = rr.nextRecord();
        assertEquals(5, r.getRecord().size());
        lineCount++;
        RecordMetaData meta = r.getMetaData();
        //            System.out.println(r.getRecord() + "\t" + meta.getLocation() + "\t" + meta.getURI());

        metaList.add(meta);
        writables.add(r.getRecord());
    }
    assertFalse(rr.hasNext());
    assertEquals(150, lineCount);
    rr.reset();


    System.out.println("\n\n\n--------------------------------");
    List<Record> contents = rr.loadFromMetaData(metaList);
    assertEquals(150, contents.size());
    //        for(Record r : contents ){
    //            System.out.println(r);
    //        }

    List<RecordMetaData> meta2 = new ArrayList<>();
    meta2.add(metaList.get(100));
    meta2.add(metaList.get(90));
    meta2.add(metaList.get(80));
    meta2.add(metaList.get(70));
    meta2.add(metaList.get(60));

    List<Record> contents2 = rr.loadFromMetaData(meta2);
    assertEquals(writables.get(100), contents2.get(0).getRecord());
    assertEquals(writables.get(90), contents2.get(1).getRecord());
    assertEquals(writables.get(80), contents2.get(2).getRecord());
    assertEquals(writables.get(70), contents2.get(3).getRecord());
    assertEquals(writables.get(60), contents2.get(4).getRecord());
}