Java Code Examples for org.datavec.api.records.reader.RecordReader#next()

The following examples show how to use org.datavec.api.records.reader.RecordReader#next() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: ExcelRecordReaderTest.java From DataVec with Apache License 2.0

6 votes

@Test
public void testSimple() throws Exception {
    RecordReader excel = new ExcelRecordReader();
    excel.initialize(new FileSplit(new ClassPathResource("testsheet.xlsx").getFile()));
    assertTrue(excel.hasNext());
    List<Writable> next = excel.next();
    assertEquals(3,next.size());

    RecordReader headerReader = new ExcelRecordReader(1);
    headerReader.initialize(new FileSplit(new ClassPathResource("testsheetheader.xlsx").getFile()));
    assertTrue(excel.hasNext());
    List<Writable> next2 = excel.next();
    assertEquals(3,next2.size());


}

Example 2

Source File: TestConcatenatingRecordReader.java From deeplearning4j with Apache License 2.0

6 votes

@Test
public void test() throws Exception {

    CSVRecordReader rr = new CSVRecordReader(0, ',');
    rr.initialize(new FileSplit(new ClassPathResource("datavec-api/iris.dat").getFile()));

    CSVRecordReader rr2 = new CSVRecordReader(0, ',');
    rr2.initialize(new FileSplit(new ClassPathResource("datavec-api/iris.dat").getFile()));

    RecordReader rrC = new ConcatenatingRecordReader(rr, rr2);

    int count = 0;
    while(rrC.hasNext()){
        rrC.next();
        count++;
    }

    assertEquals(300, count);
}

Example 3

Source File: JacksonRecordReaderTest.java From deeplearning4j with Apache License 2.0

6 votes

private static void testJacksonRecordReader(RecordReader rr) {

        List<Writable> json0 = rr.next();
        List<Writable> exp0 = Arrays.asList((Writable) new Text("aValue0"), new Text("bValue0"), new Text("cxValue0"));
        assertEquals(exp0, json0);

        List<Writable> json1 = rr.next();
        List<Writable> exp1 =
                        Arrays.asList((Writable) new Text("aValue1"), new Text("MISSING_B"), new Text("cxValue1"));
        assertEquals(exp1, json1);

        List<Writable> json2 = rr.next();
        List<Writable> exp2 =
                        Arrays.asList((Writable) new Text("aValue2"), new Text("bValue2"), new Text("MISSING_CX"));
        assertEquals(exp2, json2);

        assertFalse(rr.hasNext());

        //Test reset
        rr.reset();
        assertEquals(exp0, rr.next());
        assertEquals(exp1, rr.next());
        assertEquals(exp2, rr.next());
        assertFalse(rr.hasNext());
    }

Example 4

Source File: ArrowConverterTest.java From deeplearning4j with Apache License 2.0

6 votes

@Test
public void testRecordReaderAndWriteFile() throws Exception {
    val recordsToWrite = recordToWrite();
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    ArrowConverter.writeRecordBatchTo(recordsToWrite.getRight(),recordsToWrite.getFirst(),byteArrayOutputStream);
    byte[] arr = byteArrayOutputStream.toByteArray();
    val read = ArrowConverter.readFromBytes(arr);
    assertEquals(recordsToWrite,read);

    //send file
    File tmp =  tmpDataFile(recordsToWrite);
    RecordReader recordReader = new ArrowRecordReader();

    recordReader.initialize(new FileSplit(tmp));

    List<Writable> record = recordReader.next();
    assertEquals(2,record.size());

}

Example 5

Source File: ExcelRecordReaderTest.java From deeplearning4j with Apache License 2.0

6 votes

@Test
public void testSimple() throws Exception {
    RecordReader excel = new ExcelRecordReader();
    excel.initialize(new FileSplit(new ClassPathResource("datavec-excel/testsheet.xlsx").getFile()));
    assertTrue(excel.hasNext());
    List<Writable> next = excel.next();
    assertEquals(3,next.size());

    RecordReader headerReader = new ExcelRecordReader(1);
    headerReader.initialize(new FileSplit(new ClassPathResource("datavec-excel/testsheetheader.xlsx").getFile()));
    assertTrue(excel.hasNext());
    List<Writable> next2 = excel.next();
    assertEquals(3,next2.size());


}

Example 6

Source File: TestSerialization.java From DataVec with Apache License 2.0

6 votes

@Test
public void testCsvRRSerializationResults() throws Exception {
    int skipLines = 3;
    RecordReader r1 = new CSVRecordReader(skipLines, '\t');
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    ObjectOutputStream os = new ObjectOutputStream(baos);
    os.writeObject(r1);
    byte[] bytes = baos.toByteArray();
    ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(bytes));
    RecordReader r2 = (RecordReader) ois.readObject();

    File f = new ClassPathResource("iris_tab_delim.txt").getFile();

    r1.initialize(new FileSplit(f));
    r2.initialize(new FileSplit(f));

    int count = 0;
    while(r1.hasNext()){
        List<Writable> n1 = r1.next();
        List<Writable> n2 = r2.next();
        assertEquals(n1, n2);
        count++;
    }

    assertEquals(150-skipLines, count);
}

Example 7

Source File: TestConcatenatingRecordReader.java From DataVec with Apache License 2.0

6 votes

@Test
public void test() throws Exception {

    CSVRecordReader rr = new CSVRecordReader(0, ',');
    rr.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));

    CSVRecordReader rr2 = new CSVRecordReader(0, ',');
    rr2.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));

    RecordReader rrC = new ConcatenatingRecordReader(rr, rr2);

    int count = 0;
    while(rrC.hasNext()){
        rrC.next();
        count++;
    }

    assertEquals(300, count);
}

Example 8

Source File: JacksonRecordReaderTest.java From DataVec with Apache License 2.0

6 votes

private static void testJacksonRecordReader(RecordReader rr) {

        List<Writable> json0 = rr.next();
        List<Writable> exp0 = Arrays.asList((Writable) new Text("aValue0"), new Text("bValue0"), new Text("cxValue0"));
        assertEquals(exp0, json0);

        List<Writable> json1 = rr.next();
        List<Writable> exp1 =
                        Arrays.asList((Writable) new Text("aValue1"), new Text("MISSING_B"), new Text("cxValue1"));
        assertEquals(exp1, json1);

        List<Writable> json2 = rr.next();
        List<Writable> exp2 =
                        Arrays.asList((Writable) new Text("aValue2"), new Text("bValue2"), new Text("MISSING_CX"));
        assertEquals(exp2, json2);

        assertFalse(rr.hasNext());

        //Test reset
        rr.reset();
        assertEquals(exp0, rr.next());
        assertEquals(exp1, rr.next());
        assertEquals(exp2, rr.next());
        assertFalse(rr.hasNext());
    }

Example 9

Source File: ArrowConverterTest.java From DataVec with Apache License 2.0

6 votes

@Test
public void testRecordReaderAndWriteFile() throws Exception {
    val recordsToWrite = recordToWrite();
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    ArrowConverter.writeRecordBatchTo(recordsToWrite.getRight(),recordsToWrite.getFirst(),byteArrayOutputStream);
    byte[] arr = byteArrayOutputStream.toByteArray();
    val read = ArrowConverter.readFromBytes(arr);
    assertEquals(recordsToWrite,read);

    //send file
    File tmp =  tmpDataFile(recordsToWrite);
    RecordReader recordReader = new ArrowRecordReader();

    recordReader.initialize(new FileSplit(tmp));

    List<Writable> record = recordReader.next();
    assertEquals(2,record.size());

}

Example 10

Source File: TestSerialization.java From deeplearning4j with Apache License 2.0

6 votes

@Test
public void testCsvRRSerializationResults() throws Exception {
    int skipLines = 3;
    RecordReader r1 = new CSVRecordReader(skipLines, '\t');
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    ObjectOutputStream os = new ObjectOutputStream(baos);
    os.writeObject(r1);
    byte[] bytes = baos.toByteArray();
    ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(bytes));
    RecordReader r2 = (RecordReader) ois.readObject();

    File f = new ClassPathResource("datavec-api/iris_tab_delim.txt").getFile();

    r1.initialize(new FileSplit(f));
    r2.initialize(new FileSplit(f));

    int count = 0;
    while(r1.hasNext()){
        List<Writable> n1 = r1.next();
        List<Writable> n2 = r2.next();
        assertEquals(n1, n2);
        count++;
    }

    assertEquals(150-skipLines, count);
}

Example 11

Source File: JacksonLineRecordReaderTest.java From deeplearning4j with Apache License 2.0

5 votes

private static void testJacksonRecordReader(RecordReader rr) {
	while (rr.hasNext()) {
    	List<Writable> json0 = rr.next();
    	//System.out.println(json0);
    	assert(json0.size() > 0);
	}
}

Example 12

Source File: JacksonLineRecordReaderTest.java From DataVec with Apache License 2.0

5 votes

private static void testJacksonRecordReader(RecordReader rr) {
	while (rr.hasNext()) {
    	List<Writable> json0 = rr.next();
    	//System.out.println(json0);
    	assert(json0.size() > 0);
	}
}

Example 13

Source File: LineReaderTest.java From DataVec with Apache License 2.0

5 votes

@Test
public void testLineReader() throws Exception {
    String tempDir = System.getProperty("java.io.tmpdir");
    File tmpdir = new File(tempDir, "tmpdir-testLineReader");
    if (tmpdir.exists())
        tmpdir.delete();
    tmpdir.mkdir();

    File tmp1 = new File(FilenameUtils.concat(tmpdir.getPath(), "tmp1.txt"));
    File tmp2 = new File(FilenameUtils.concat(tmpdir.getPath(), "tmp2.txt"));
    File tmp3 = new File(FilenameUtils.concat(tmpdir.getPath(), "tmp3.txt"));

    FileUtils.writeLines(tmp1, Arrays.asList("1", "2", "3"));
    FileUtils.writeLines(tmp2, Arrays.asList("4", "5", "6"));
    FileUtils.writeLines(tmp3, Arrays.asList("7", "8", "9"));

    InputSplit split = new FileSplit(tmpdir);

    RecordReader reader = new LineRecordReader();
    reader.initialize(split);

    int count = 0;
    List<List<Writable>> list = new ArrayList<>();
    while (reader.hasNext()) {
        List<Writable> l = reader.next();
        assertEquals(1, l.size());
        list.add(l);
        count++;
    }

    assertEquals(9, count);

    try {
        FileUtils.deleteDirectory(tmpdir);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

Example 14

Source File: TransformProcess.java From DataVec with Apache License 2.0

5 votes

/**
 * Infer the categories for the given record reader for
 * a particular set of columns (this is more efficient than
 * {@link #inferCategories(RecordReader, int)}
 * if you have more than one column you plan on inferring categories for)
 *
 * Note that each "column index" is a column in the context of:
 * List<Writable> record = ...;
 * record.get(columnIndex);
 *
 *
 *  Note that anything passed in as a column will be automatically converted to a
 *  string for categorical purposes. Results may vary depending on what's passed in.
 *  The *expected* input is strings or numbers (which have sensible toString() representations)
 *
 * Note that the returned categories will be sorted alphabetically, for each column
 *
 * @param recordReader the record reader to scan
 * @param columnIndices the column indices the get
 * @return the inferred categories
 */
public static Map<Integer,List<String>> inferCategories(RecordReader recordReader,int[] columnIndices) {
    if(columnIndices == null || columnIndices.length < 1) {
        return Collections.emptyMap();
    }

    Map<Integer,List<String>> categoryMap = new HashMap<>();
    Map<Integer,Set<String>> categories = new HashMap<>();
    for(int i = 0; i < columnIndices.length; i++) {
        categoryMap.put(columnIndices[i],new ArrayList<String>());
        categories.put(columnIndices[i],new HashSet<String>());
    }
    while(recordReader.hasNext()) {
        List<Writable> next = recordReader.next();
        for(int i = 0; i < columnIndices.length; i++) {
            if(columnIndices[i] >= next.size()) {
                log.warn("Filtering out example: Invalid length of columns");
                continue;
            }

            categories.get(columnIndices[i]).add(next.get(columnIndices[i]).toString());
        }

    }

    for(int i = 0; i < columnIndices.length; i++) {
        categoryMap.get(columnIndices[i]).addAll(categories.get(columnIndices[i]));

        //Sort categories alphabetically - HashSet and RecordReader orders are not deterministic in general
        Collections.sort(categoryMap.get(columnIndices[i]));
    }

    return categoryMap;
}

Example 15

Source File: LineReaderTest.java From deeplearning4j with Apache License 2.0

5 votes

@Test
public void testLineReader() throws Exception {
    File tmpdir = testDir.newFolder();
    if (tmpdir.exists())
        tmpdir.delete();
    tmpdir.mkdir();

    File tmp1 = new File(FilenameUtils.concat(tmpdir.getPath(), "tmp1.txt"));
    File tmp2 = new File(FilenameUtils.concat(tmpdir.getPath(), "tmp2.txt"));
    File tmp3 = new File(FilenameUtils.concat(tmpdir.getPath(), "tmp3.txt"));

    FileUtils.writeLines(tmp1, Arrays.asList("1", "2", "3"));
    FileUtils.writeLines(tmp2, Arrays.asList("4", "5", "6"));
    FileUtils.writeLines(tmp3, Arrays.asList("7", "8", "9"));

    InputSplit split = new FileSplit(tmpdir);

    RecordReader reader = new LineRecordReader();
    reader.initialize(split);

    int count = 0;
    List<List<Writable>> list = new ArrayList<>();
    while (reader.hasNext()) {
        List<Writable> l = reader.next();
        assertEquals(1, l.size());
        list.add(l);
        count++;
    }

    assertEquals(9, count);
}

Example 16

Source File: TransformProcess.java From deeplearning4j with Apache License 2.0

5 votes

/**
 * Infer the categories for the given record reader for
 * a particular set of columns (this is more efficient than
 * {@link #inferCategories(RecordReader, int)}
 * if you have more than one column you plan on inferring categories for)
 *
 * Note that each "column index" is a column in the context of:
 * List<Writable> record = ...;
 * record.get(columnIndex);
 *
 *
 *  Note that anything passed in as a column will be automatically converted to a
 *  string for categorical purposes. Results may vary depending on what's passed in.
 *  The *expected* input is strings or numbers (which have sensible toString() representations)
 *
 * Note that the returned categories will be sorted alphabetically, for each column
 *
 * @param recordReader the record reader to scan
 * @param columnIndices the column indices the get
 * @return the inferred categories
 */
public static Map<Integer,List<String>> inferCategories(RecordReader recordReader,int[] columnIndices) {
    if(columnIndices == null || columnIndices.length < 1) {
        return Collections.emptyMap();
    }

    Map<Integer,List<String>> categoryMap = new HashMap<>();
    Map<Integer,Set<String>> categories = new HashMap<>();
    for(int i = 0; i < columnIndices.length; i++) {
        categoryMap.put(columnIndices[i],new ArrayList<String>());
        categories.put(columnIndices[i],new HashSet<String>());
    }
    while(recordReader.hasNext()) {
        List<Writable> next = recordReader.next();
        for(int i = 0; i < columnIndices.length; i++) {
            if(columnIndices[i] >= next.size()) {
                log.warn("Filtering out example: Invalid length of columns");
                continue;
            }

            categories.get(columnIndices[i]).add(next.get(columnIndices[i]).toString());
        }

    }

    for(int i = 0; i < columnIndices.length; i++) {
        categoryMap.get(columnIndices[i]).addAll(categories.get(columnIndices[i]));

        //Sort categories alphabetically - HashSet and RecordReader orders are not deterministic in general
        Collections.sort(categoryMap.get(columnIndices[i]));
    }

    return categoryMap;
}

Example 17

Source File: ConcatenatingRecordReader.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public List<Writable> next() {
    List<Writable> out = null;
    for( RecordReader rr : readers){
        if(rr.hasNext()){
            out = rr.next();
            break;
        }
    }
    invokeListeners(out);
    return out;
}

Example 18

Source File: VasttextDataIterator.java From scava with Eclipse Public License 2.0

5 votes

@Override
public MultiDataSet next(int num) {
	if (!hasNext())
		throw new NoSuchElementException("No next elements");

	// First: load the next values from the RR / SeqRRs
	Map<String, List<List<Writable>>> nextRRVals = new HashMap<>();
	List<RecordMetaDataComposableMap> nextMetas = (collectMetaData ? new ArrayList<RecordMetaDataComposableMap>()
			: null);

	for (Map.Entry<String, RecordReader> entry : recordReaders.entrySet()) {
		RecordReader rr = entry.getValue();
		// Standard case
			List<List<Writable>> writables = new ArrayList<>(Math.min(num, 100000)); // Min op: in case user puts
																						// batch size >> amount of
																						// data
			for (int i = 0; i < num && rr.hasNext(); i++) 
			{
				List<Writable> record;
				if (collectMetaData) {
					Record r = rr.nextRecord();
					record = r.getRecord();
					if (nextMetas.size() <= i) {
						nextMetas.add(new RecordMetaDataComposableMap(new HashMap<String, RecordMetaData>()));
					}
					RecordMetaDataComposableMap map = nextMetas.get(i);
					map.getMeta().put(entry.getKey(), r.getMetaData());
				} else {
					record = rr.next();
				}
				writables.add(record);
			}

			nextRRVals.put(entry.getKey(), writables);
	}

	return nextMultiDataSet(nextRRVals, nextMetas);
}

Example 19

Source File: TransformProcess.java From DataVec with Apache License 2.0

3 votes

/**
 * Infer the categories for the given record reader for a particular column
 *  Note that each "column index" is a column in the context of:
 * List<Writable> record = ...;
 * record.get(columnIndex);
 *
 *  Note that anything passed in as a column will be automatically converted to a
 *  string for categorical purposes.
 *
 *  The *expected* input is strings or numbers (which have sensible toString() representations)
 *
 *  Note that the returned categories will be sorted alphabetically
 *
 * @param recordReader the record reader to iterate through
 * @param columnIndex te column index to get categories for
 * @return
 */
public static List<String> inferCategories(RecordReader recordReader,int columnIndex) {
    Set<String> categories = new HashSet<>();
    while(recordReader.hasNext()) {
        List<Writable> next = recordReader.next();
        categories.add(next.get(columnIndex).toString());
    }

    //Sort categories alphabetically - HashSet and RecordReader orders are not deterministic in general
    List<String> ret = new ArrayList<>(categories);
    Collections.sort(ret);
    return ret;
}

Example 20

Source File: TransformProcess.java From deeplearning4j with Apache License 2.0

3 votes

/**
 * Infer the categories for the given record reader for a particular column
 *  Note that each "column index" is a column in the context of:
 * List<Writable> record = ...;
 * record.get(columnIndex);
 *
 *  Note that anything passed in as a column will be automatically converted to a
 *  string for categorical purposes.
 *
 *  The *expected* input is strings or numbers (which have sensible toString() representations)
 *
 *  Note that the returned categories will be sorted alphabetically
 *
 * @param recordReader the record reader to iterate through
 * @param columnIndex te column index to get categories for
 * @return
 */
public static List<String> inferCategories(RecordReader recordReader,int columnIndex) {
    Set<String> categories = new HashSet<>();
    while(recordReader.hasNext()) {
        List<Writable> next = recordReader.next();
        categories.add(next.get(columnIndex).toString());
    }

    //Sort categories alphabetically - HashSet and RecordReader orders are not deterministic in general
    List<String> ret = new ArrayList<>(categories);
    Collections.sort(ret);
    return ret;
}