Java Code Examples for org.datavec.api.records.reader.RecordReader#next()

The following examples show how to use org.datavec.api.records.reader.RecordReader#next() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ExcelRecordReaderTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testSimple() throws Exception {
    RecordReader excel = new ExcelRecordReader();
    excel.initialize(new FileSplit(new ClassPathResource("testsheet.xlsx").getFile()));
    assertTrue(excel.hasNext());
    List<Writable> next = excel.next();
    assertEquals(3,next.size());

    RecordReader headerReader = new ExcelRecordReader(1);
    headerReader.initialize(new FileSplit(new ClassPathResource("testsheetheader.xlsx").getFile()));
    assertTrue(excel.hasNext());
    List<Writable> next2 = excel.next();
    assertEquals(3,next2.size());


}
 
Example 2
Source File: TestConcatenatingRecordReader.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void test() throws Exception {

    CSVRecordReader rr = new CSVRecordReader(0, ',');
    rr.initialize(new FileSplit(new ClassPathResource("datavec-api/iris.dat").getFile()));

    CSVRecordReader rr2 = new CSVRecordReader(0, ',');
    rr2.initialize(new FileSplit(new ClassPathResource("datavec-api/iris.dat").getFile()));

    RecordReader rrC = new ConcatenatingRecordReader(rr, rr2);

    int count = 0;
    while(rrC.hasNext()){
        rrC.next();
        count++;
    }

    assertEquals(300, count);
}
 
Example 3
Source File: JacksonRecordReaderTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
private static void testJacksonRecordReader(RecordReader rr) {

        List<Writable> json0 = rr.next();
        List<Writable> exp0 = Arrays.asList((Writable) new Text("aValue0"), new Text("bValue0"), new Text("cxValue0"));
        assertEquals(exp0, json0);

        List<Writable> json1 = rr.next();
        List<Writable> exp1 =
                        Arrays.asList((Writable) new Text("aValue1"), new Text("MISSING_B"), new Text("cxValue1"));
        assertEquals(exp1, json1);

        List<Writable> json2 = rr.next();
        List<Writable> exp2 =
                        Arrays.asList((Writable) new Text("aValue2"), new Text("bValue2"), new Text("MISSING_CX"));
        assertEquals(exp2, json2);

        assertFalse(rr.hasNext());

        //Test reset
        rr.reset();
        assertEquals(exp0, rr.next());
        assertEquals(exp1, rr.next());
        assertEquals(exp2, rr.next());
        assertFalse(rr.hasNext());
    }
 
Example 4
Source File: ArrowConverterTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testRecordReaderAndWriteFile() throws Exception {
    val recordsToWrite = recordToWrite();
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    ArrowConverter.writeRecordBatchTo(recordsToWrite.getRight(),recordsToWrite.getFirst(),byteArrayOutputStream);
    byte[] arr = byteArrayOutputStream.toByteArray();
    val read = ArrowConverter.readFromBytes(arr);
    assertEquals(recordsToWrite,read);

    //send file
    File tmp =  tmpDataFile(recordsToWrite);
    RecordReader recordReader = new ArrowRecordReader();

    recordReader.initialize(new FileSplit(tmp));

    List<Writable> record = recordReader.next();
    assertEquals(2,record.size());

}
 
Example 5
Source File: ExcelRecordReaderTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testSimple() throws Exception {
    RecordReader excel = new ExcelRecordReader();
    excel.initialize(new FileSplit(new ClassPathResource("datavec-excel/testsheet.xlsx").getFile()));
    assertTrue(excel.hasNext());
    List<Writable> next = excel.next();
    assertEquals(3,next.size());

    RecordReader headerReader = new ExcelRecordReader(1);
    headerReader.initialize(new FileSplit(new ClassPathResource("datavec-excel/testsheetheader.xlsx").getFile()));
    assertTrue(excel.hasNext());
    List<Writable> next2 = excel.next();
    assertEquals(3,next2.size());


}
 
Example 6
Source File: TestSerialization.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testCsvRRSerializationResults() throws Exception {
    int skipLines = 3;
    RecordReader r1 = new CSVRecordReader(skipLines, '\t');
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    ObjectOutputStream os = new ObjectOutputStream(baos);
    os.writeObject(r1);
    byte[] bytes = baos.toByteArray();
    ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(bytes));
    RecordReader r2 = (RecordReader) ois.readObject();

    File f = new ClassPathResource("iris_tab_delim.txt").getFile();

    r1.initialize(new FileSplit(f));
    r2.initialize(new FileSplit(f));

    int count = 0;
    while(r1.hasNext()){
        List<Writable> n1 = r1.next();
        List<Writable> n2 = r2.next();
        assertEquals(n1, n2);
        count++;
    }

    assertEquals(150-skipLines, count);
}
 
Example 7
Source File: TestConcatenatingRecordReader.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void test() throws Exception {

    CSVRecordReader rr = new CSVRecordReader(0, ',');
    rr.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));

    CSVRecordReader rr2 = new CSVRecordReader(0, ',');
    rr2.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));

    RecordReader rrC = new ConcatenatingRecordReader(rr, rr2);

    int count = 0;
    while(rrC.hasNext()){
        rrC.next();
        count++;
    }

    assertEquals(300, count);
}
 
Example 8
Source File: JacksonRecordReaderTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
private static void testJacksonRecordReader(RecordReader rr) {

        List<Writable> json0 = rr.next();
        List<Writable> exp0 = Arrays.asList((Writable) new Text("aValue0"), new Text("bValue0"), new Text("cxValue0"));
        assertEquals(exp0, json0);

        List<Writable> json1 = rr.next();
        List<Writable> exp1 =
                        Arrays.asList((Writable) new Text("aValue1"), new Text("MISSING_B"), new Text("cxValue1"));
        assertEquals(exp1, json1);

        List<Writable> json2 = rr.next();
        List<Writable> exp2 =
                        Arrays.asList((Writable) new Text("aValue2"), new Text("bValue2"), new Text("MISSING_CX"));
        assertEquals(exp2, json2);

        assertFalse(rr.hasNext());

        //Test reset
        rr.reset();
        assertEquals(exp0, rr.next());
        assertEquals(exp1, rr.next());
        assertEquals(exp2, rr.next());
        assertFalse(rr.hasNext());
    }
 
Example 9
Source File: ArrowConverterTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testRecordReaderAndWriteFile() throws Exception {
    val recordsToWrite = recordToWrite();
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    ArrowConverter.writeRecordBatchTo(recordsToWrite.getRight(),recordsToWrite.getFirst(),byteArrayOutputStream);
    byte[] arr = byteArrayOutputStream.toByteArray();
    val read = ArrowConverter.readFromBytes(arr);
    assertEquals(recordsToWrite,read);

    //send file
    File tmp =  tmpDataFile(recordsToWrite);
    RecordReader recordReader = new ArrowRecordReader();

    recordReader.initialize(new FileSplit(tmp));

    List<Writable> record = recordReader.next();
    assertEquals(2,record.size());

}
 
Example 10
Source File: TestSerialization.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testCsvRRSerializationResults() throws Exception {
    int skipLines = 3;
    RecordReader r1 = new CSVRecordReader(skipLines, '\t');
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    ObjectOutputStream os = new ObjectOutputStream(baos);
    os.writeObject(r1);
    byte[] bytes = baos.toByteArray();
    ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(bytes));
    RecordReader r2 = (RecordReader) ois.readObject();

    File f = new ClassPathResource("datavec-api/iris_tab_delim.txt").getFile();

    r1.initialize(new FileSplit(f));
    r2.initialize(new FileSplit(f));

    int count = 0;
    while(r1.hasNext()){
        List<Writable> n1 = r1.next();
        List<Writable> n2 = r2.next();
        assertEquals(n1, n2);
        count++;
    }

    assertEquals(150-skipLines, count);
}
 
Example 11
Source File: JacksonLineRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
private static void testJacksonRecordReader(RecordReader rr) {
	while (rr.hasNext()) {
    	List<Writable> json0 = rr.next();
    	//System.out.println(json0);
    	assert(json0.size() > 0);
	}
}
 
Example 12
Source File: JacksonLineRecordReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
private static void testJacksonRecordReader(RecordReader rr) {
	while (rr.hasNext()) {
    	List<Writable> json0 = rr.next();
    	//System.out.println(json0);
    	assert(json0.size() > 0);
	}
}
 
Example 13
Source File: LineReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testLineReader() throws Exception {
    String tempDir = System.getProperty("java.io.tmpdir");
    File tmpdir = new File(tempDir, "tmpdir-testLineReader");
    if (tmpdir.exists())
        tmpdir.delete();
    tmpdir.mkdir();

    File tmp1 = new File(FilenameUtils.concat(tmpdir.getPath(), "tmp1.txt"));
    File tmp2 = new File(FilenameUtils.concat(tmpdir.getPath(), "tmp2.txt"));
    File tmp3 = new File(FilenameUtils.concat(tmpdir.getPath(), "tmp3.txt"));

    FileUtils.writeLines(tmp1, Arrays.asList("1", "2", "3"));
    FileUtils.writeLines(tmp2, Arrays.asList("4", "5", "6"));
    FileUtils.writeLines(tmp3, Arrays.asList("7", "8", "9"));

    InputSplit split = new FileSplit(tmpdir);

    RecordReader reader = new LineRecordReader();
    reader.initialize(split);

    int count = 0;
    List<List<Writable>> list = new ArrayList<>();
    while (reader.hasNext()) {
        List<Writable> l = reader.next();
        assertEquals(1, l.size());
        list.add(l);
        count++;
    }

    assertEquals(9, count);

    try {
        FileUtils.deleteDirectory(tmpdir);
    } catch (Exception e) {
        e.printStackTrace();
    }
}
 
Example 14
Source File: TransformProcess.java    From DataVec with Apache License 2.0 5 votes vote down vote up
/**
 * Infer the categories for the given record reader for
 * a particular set of columns (this is more efficient than
 * {@link #inferCategories(RecordReader, int)}
 * if you have more than one column you plan on inferring categories for)
 *
 * Note that each "column index" is a column in the context of:
 * List<Writable> record = ...;
 * record.get(columnIndex);
 *
 *
 *  Note that anything passed in as a column will be automatically converted to a
 *  string for categorical purposes. Results may vary depending on what's passed in.
 *  The *expected* input is strings or numbers (which have sensible toString() representations)
 *
 * Note that the returned categories will be sorted alphabetically, for each column
 *
 * @param recordReader the record reader to scan
 * @param columnIndices the column indices the get
 * @return the inferred categories
 */
public static Map<Integer,List<String>> inferCategories(RecordReader recordReader,int[] columnIndices) {
    if(columnIndices == null || columnIndices.length < 1) {
        return Collections.emptyMap();
    }

    Map<Integer,List<String>> categoryMap = new HashMap<>();
    Map<Integer,Set<String>> categories = new HashMap<>();
    for(int i = 0; i < columnIndices.length; i++) {
        categoryMap.put(columnIndices[i],new ArrayList<String>());
        categories.put(columnIndices[i],new HashSet<String>());
    }
    while(recordReader.hasNext()) {
        List<Writable> next = recordReader.next();
        for(int i = 0; i < columnIndices.length; i++) {
            if(columnIndices[i] >= next.size()) {
                log.warn("Filtering out example: Invalid length of columns");
                continue;
            }

            categories.get(columnIndices[i]).add(next.get(columnIndices[i]).toString());
        }

    }

    for(int i = 0; i < columnIndices.length; i++) {
        categoryMap.get(columnIndices[i]).addAll(categories.get(columnIndices[i]));

        //Sort categories alphabetically - HashSet and RecordReader orders are not deterministic in general
        Collections.sort(categoryMap.get(columnIndices[i]));
    }

    return categoryMap;
}
 
Example 15
Source File: LineReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testLineReader() throws Exception {
    File tmpdir = testDir.newFolder();
    if (tmpdir.exists())
        tmpdir.delete();
    tmpdir.mkdir();

    File tmp1 = new File(FilenameUtils.concat(tmpdir.getPath(), "tmp1.txt"));
    File tmp2 = new File(FilenameUtils.concat(tmpdir.getPath(), "tmp2.txt"));
    File tmp3 = new File(FilenameUtils.concat(tmpdir.getPath(), "tmp3.txt"));

    FileUtils.writeLines(tmp1, Arrays.asList("1", "2", "3"));
    FileUtils.writeLines(tmp2, Arrays.asList("4", "5", "6"));
    FileUtils.writeLines(tmp3, Arrays.asList("7", "8", "9"));

    InputSplit split = new FileSplit(tmpdir);

    RecordReader reader = new LineRecordReader();
    reader.initialize(split);

    int count = 0;
    List<List<Writable>> list = new ArrayList<>();
    while (reader.hasNext()) {
        List<Writable> l = reader.next();
        assertEquals(1, l.size());
        list.add(l);
        count++;
    }

    assertEquals(9, count);
}
 
Example 16
Source File: TransformProcess.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * Infer the categories for the given record reader for
 * a particular set of columns (this is more efficient than
 * {@link #inferCategories(RecordReader, int)}
 * if you have more than one column you plan on inferring categories for)
 *
 * Note that each "column index" is a column in the context of:
 * List<Writable> record = ...;
 * record.get(columnIndex);
 *
 *
 *  Note that anything passed in as a column will be automatically converted to a
 *  string for categorical purposes. Results may vary depending on what's passed in.
 *  The *expected* input is strings or numbers (which have sensible toString() representations)
 *
 * Note that the returned categories will be sorted alphabetically, for each column
 *
 * @param recordReader the record reader to scan
 * @param columnIndices the column indices the get
 * @return the inferred categories
 */
public static Map<Integer,List<String>> inferCategories(RecordReader recordReader,int[] columnIndices) {
    if(columnIndices == null || columnIndices.length < 1) {
        return Collections.emptyMap();
    }

    Map<Integer,List<String>> categoryMap = new HashMap<>();
    Map<Integer,Set<String>> categories = new HashMap<>();
    for(int i = 0; i < columnIndices.length; i++) {
        categoryMap.put(columnIndices[i],new ArrayList<String>());
        categories.put(columnIndices[i],new HashSet<String>());
    }
    while(recordReader.hasNext()) {
        List<Writable> next = recordReader.next();
        for(int i = 0; i < columnIndices.length; i++) {
            if(columnIndices[i] >= next.size()) {
                log.warn("Filtering out example: Invalid length of columns");
                continue;
            }

            categories.get(columnIndices[i]).add(next.get(columnIndices[i]).toString());
        }

    }

    for(int i = 0; i < columnIndices.length; i++) {
        categoryMap.get(columnIndices[i]).addAll(categories.get(columnIndices[i]));

        //Sort categories alphabetically - HashSet and RecordReader orders are not deterministic in general
        Collections.sort(categoryMap.get(columnIndices[i]));
    }

    return categoryMap;
}
 
Example 17
Source File: ConcatenatingRecordReader.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public List<Writable> next() {
    List<Writable> out = null;
    for( RecordReader rr : readers){
        if(rr.hasNext()){
            out = rr.next();
            break;
        }
    }
    invokeListeners(out);
    return out;
}
 
Example 18
Source File: VasttextDataIterator.java    From scava with Eclipse Public License 2.0 5 votes vote down vote up
@Override
public MultiDataSet next(int num) {
	if (!hasNext())
		throw new NoSuchElementException("No next elements");

	// First: load the next values from the RR / SeqRRs
	Map<String, List<List<Writable>>> nextRRVals = new HashMap<>();
	List<RecordMetaDataComposableMap> nextMetas = (collectMetaData ? new ArrayList<RecordMetaDataComposableMap>()
			: null);

	for (Map.Entry<String, RecordReader> entry : recordReaders.entrySet()) {
		RecordReader rr = entry.getValue();
		// Standard case
			List<List<Writable>> writables = new ArrayList<>(Math.min(num, 100000)); // Min op: in case user puts
																						// batch size >> amount of
																						// data
			for (int i = 0; i < num && rr.hasNext(); i++) 
			{
				List<Writable> record;
				if (collectMetaData) {
					Record r = rr.nextRecord();
					record = r.getRecord();
					if (nextMetas.size() <= i) {
						nextMetas.add(new RecordMetaDataComposableMap(new HashMap<String, RecordMetaData>()));
					}
					RecordMetaDataComposableMap map = nextMetas.get(i);
					map.getMeta().put(entry.getKey(), r.getMetaData());
				} else {
					record = rr.next();
				}
				writables.add(record);
			}

			nextRRVals.put(entry.getKey(), writables);
	}

	return nextMultiDataSet(nextRRVals, nextMetas);
}
 
Example 19
Source File: TransformProcess.java    From DataVec with Apache License 2.0 3 votes vote down vote up
/**
 * Infer the categories for the given record reader for a particular column
 *  Note that each "column index" is a column in the context of:
 * List<Writable> record = ...;
 * record.get(columnIndex);
 *
 *  Note that anything passed in as a column will be automatically converted to a
 *  string for categorical purposes.
 *
 *  The *expected* input is strings or numbers (which have sensible toString() representations)
 *
 *  Note that the returned categories will be sorted alphabetically
 *
 * @param recordReader the record reader to iterate through
 * @param columnIndex te column index to get categories for
 * @return
 */
public static List<String> inferCategories(RecordReader recordReader,int columnIndex) {
    Set<String> categories = new HashSet<>();
    while(recordReader.hasNext()) {
        List<Writable> next = recordReader.next();
        categories.add(next.get(columnIndex).toString());
    }

    //Sort categories alphabetically - HashSet and RecordReader orders are not deterministic in general
    List<String> ret = new ArrayList<>(categories);
    Collections.sort(ret);
    return ret;
}
 
Example 20
Source File: TransformProcess.java    From deeplearning4j with Apache License 2.0 3 votes vote down vote up
/**
 * Infer the categories for the given record reader for a particular column
 *  Note that each "column index" is a column in the context of:
 * List<Writable> record = ...;
 * record.get(columnIndex);
 *
 *  Note that anything passed in as a column will be automatically converted to a
 *  string for categorical purposes.
 *
 *  The *expected* input is strings or numbers (which have sensible toString() representations)
 *
 *  Note that the returned categories will be sorted alphabetically
 *
 * @param recordReader the record reader to iterate through
 * @param columnIndex te column index to get categories for
 * @return
 */
public static List<String> inferCategories(RecordReader recordReader,int columnIndex) {
    Set<String> categories = new HashSet<>();
    while(recordReader.hasNext()) {
        List<Writable> next = recordReader.next();
        categories.add(next.get(columnIndex).toString());
    }

    //Sort categories alphabetically - HashSet and RecordReader orders are not deterministic in general
    List<String> ret = new ArrayList<>(categories);
    Collections.sort(ret);
    return ret;
}