Java Code Examples for org.datavec.api.split.StringSplit

The following examples show how to use org.datavec.api.split.StringSplit. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
@Override
public boolean hasNext() {
    if (iter != null && iter.hasNext()) {
        return true;
    } else {
        if (locations != null && !(inputSplit instanceof StringSplit) && splitIndex < locations.length - 1) {
            splitIndex++;
            lineIndex = 0; //New split -> reset line count
            try {
                close();
                iter = lineIterator(new InputStreamReader(locations[splitIndex].toURL().openStream()));
                onLocationOpen(locations[splitIndex]);
            } catch (IOException e) {
                e.printStackTrace();
            }

            return iter.hasNext();
        }

        return false;
    }
}
 
Example 2
Source Project: DataVec   Source File: LineRecordReader.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public boolean hasNext() {
    if (iter != null && iter.hasNext()) {
        return true;
    } else {
        if (locations != null && !(inputSplit instanceof StringSplit) && splitIndex < locations.length - 1) {
            splitIndex++;
            lineIndex = 0; //New split -> reset line count
            try {
                close();
                iter = IOUtils.lineIterator(new InputStreamReader(locations[splitIndex].toURL().openStream()));
                onLocationOpen(locations[splitIndex]);
            } catch (IOException e) {
                e.printStackTrace();
            }

            return iter.hasNext();
        }

        return false;
    }
}
 
Example 3
Source Project: DataVec   Source File: LineRecordReader.java    License: Apache License 2.0 6 votes vote down vote up
protected Iterator<String> getIterator(int location) {
    Iterator<String> iterator = null;
    if (inputSplit instanceof StringSplit) {
        StringSplit stringSplit = (StringSplit) inputSplit;
        iterator = Collections.singletonList(stringSplit.getData()).listIterator();
    } else if (inputSplit instanceof InputStreamInputSplit) {
        InputStream is = ((InputStreamInputSplit) inputSplit).getIs();
        if (is != null) {
            iterator = IOUtils.lineIterator(new InputStreamReader(is));
        }
    } else {
        this.locations = inputSplit.locations();
        if (locations != null && locations.length > 0) {
            InputStream inputStream;
            try {
                inputStream = locations[location].toURL().openStream();
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
            iterator = IOUtils.lineIterator(new InputStreamReader(inputStream));
        }
    }
    if (iterator == null)
        throw new UnsupportedOperationException("Unknown input split: " + inputSplit);
    return iterator;
}
 
Example 4
Source Project: deeplearning4j   Source File: LineRecordReader.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public boolean hasNext() {
    Preconditions.checkState(initialized, "Record reader has not been initialized");

    if (iter != null && iter.hasNext()) {
        return true;
    } else {
        if (locations != null && !(inputSplit instanceof StringSplit) && splitIndex < locations.length - 1) {
            splitIndex++;
            lineIndex = 0; //New split -> reset line count
            try {
                close();
                iter = getIterator(splitIndex);
                onLocationOpen(locations[splitIndex]);
            } catch (IOException e) {
                log.error("",e);
            }

            return iter.hasNext();
        }

        return false;
    }
}
 
Example 5
@Override
public List<Writable> next() {
    List<Writable> ret = new ArrayList<>();
    if (count % 1000 == 0) {
    	log.info((new Date()) + " Data " + count);
    }
    count++;
    if (iter.hasNext()) {
        ret = iter.next();
        invokeListeners(ret);
        lineIndex++;
        return ret;
    } else {
        if (!(inputSplit instanceof StringSplit) && splitIndex < locations.length - 1) {
            splitIndex++;
            lineIndex = 0;
            try {
                close();
                iter = lineIterator(new InputStreamReader(locations[splitIndex].toURL().openStream()));
                onLocationOpen(locations[splitIndex]);
            } catch (IOException e) {
                e.printStackTrace();
            }
            lineIndex = 0; //New split opened -> reset line index

            if (iter.hasNext()) {
                ret = iter.next();
                invokeListeners(ret);
                lineIndex++;
                return ret;
            }
        }

        throw new NoSuchElementException("No more elements found!");
    }
}
 
Example 6
Source Project: DataVec   Source File: LineRecordReaderFunction.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public List<Writable> apply(String s) {
    try {
        recordReader.initialize(new StringSplit(s));
    } catch (Exception e) {
        throw new IllegalStateException(e);
    }
    return recordReader.next();
}
 
Example 7
Source Project: DataVec   Source File: CSVRecordReaderTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testNext() throws Exception {
    CSVRecordReader reader = new CSVRecordReader();
    reader.initialize(new StringSplit("1,1,8.0,,,,14.0,,,,15.0,,,,,,,,,,,,1"));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        List<Writable> arr = new ArrayList<>(vals);

        assertEquals("Entry count", 23, vals.size());
        Text lastEntry = (Text) arr.get(arr.size() - 1);
        assertEquals("Last entry garbage", 1, lastEntry.getLength());
    }
}
 
Example 8
Source Project: DataVec   Source File: CSVRecordReaderTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testEmptyEntries() throws Exception {
    CSVRecordReader reader = new CSVRecordReader();
    reader.initialize(new StringSplit("1,1,8.0,,,,14.0,,,,15.0,,,,,,,,,,,,"));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        assertEquals("Entry count", 23, vals.size());
    }
}
 
Example 9
Source Project: DataVec   Source File: CSVRecordReaderTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testWithQuotes() throws Exception {
    CSVRecordReader reader = new CSVRecordReader(0, ',', '\"');
    reader.initialize(new StringSplit("1,0,3,\"Braund, Mr. Owen Harris\",male,\"\"\"\""));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        assertEquals("Entry count", 6, vals.size());
        assertEquals("1", vals.get(0).toString());
        assertEquals("0", vals.get(1).toString());
        assertEquals("3", vals.get(2).toString());
        assertEquals("Braund, Mr. Owen Harris", vals.get(3).toString());
        assertEquals("male", vals.get(4).toString());
        assertEquals("\"", vals.get(5).toString());
    }
}
 
Example 10
Source Project: DataVec   Source File: CSVRecordReaderTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testRegex() throws Exception {
    CSVRecordReader reader = new CSVRegexRecordReader(0, ",", null, new String[] {null, "(.+) (.+) (.+)"});
    reader.initialize(new StringSplit("normal,1.2.3.4 space separator"));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        assertEquals("Entry count", 4, vals.size());
        assertEquals("normal", vals.get(0).toString());
        assertEquals("1.2.3.4", vals.get(1).toString());
        assertEquals("space", vals.get(2).toString());
        assertEquals("separator", vals.get(3).toString());
    }
}
 
Example 11
Source Project: DataVec   Source File: StringToWritablesFunction.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public List<Writable> call(String s) throws Exception {
    recordReader.initialize(new StringSplit(s));
    Collection<Writable> next = recordReader.next();
    if (next instanceof List)
        return (List<Writable>) next;
    return new ArrayList<>(next);
}
 
Example 12
Source Project: deeplearning4j   Source File: LineRecordReaderFunction.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public List<Writable> apply(String s) {
    try {
        recordReader.initialize(new StringSplit(s));
    } catch (Exception e) {
        throw new IllegalStateException(e);
    }
    return recordReader.next();
}
 
Example 13
Source Project: deeplearning4j   Source File: LineRecordReader.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(InputSplit split) throws IOException, InterruptedException {
    super.initialize(split);
    if(!(inputSplit instanceof StringSplit || inputSplit instanceof InputStreamInputSplit)){
        final ArrayList<URI> uris = new ArrayList<>();
        final Iterator<URI> uriIterator = inputSplit.locationsIterator();
        while(uriIterator.hasNext()) uris.add(uriIterator.next());

        this.locations = uris.toArray(new URI[0]);
    }
    this.iter = getIterator(0);
    this.initialized = true;
}
 
Example 14
Source Project: deeplearning4j   Source File: CSVRecordReaderTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testNext() throws Exception {
    CSVRecordReader reader = new CSVRecordReader();
    reader.initialize(new StringSplit("1,1,8.0,,,,14.0,,,,15.0,,,,,,,,,,,,1"));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        List<Writable> arr = new ArrayList<>(vals);

        assertEquals("Entry count", 23, vals.size());
        Text lastEntry = (Text) arr.get(arr.size() - 1);
        assertEquals("Last entry garbage", 1, lastEntry.getLength());
    }
}
 
Example 15
Source Project: deeplearning4j   Source File: CSVRecordReaderTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testEmptyEntries() throws Exception {
    CSVRecordReader reader = new CSVRecordReader();
    reader.initialize(new StringSplit("1,1,8.0,,,,14.0,,,,15.0,,,,,,,,,,,,"));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        assertEquals("Entry count", 23, vals.size());
    }
}
 
Example 16
Source Project: deeplearning4j   Source File: CSVRecordReaderTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testWithQuotes() throws Exception {
    CSVRecordReader reader = new CSVRecordReader(0, ',', '\"');
    reader.initialize(new StringSplit("1,0,3,\"Braund, Mr. Owen Harris\",male,\"\"\"\""));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        assertEquals("Entry count", 6, vals.size());
        assertEquals("1", vals.get(0).toString());
        assertEquals("0", vals.get(1).toString());
        assertEquals("3", vals.get(2).toString());
        assertEquals("Braund, Mr. Owen Harris", vals.get(3).toString());
        assertEquals("male", vals.get(4).toString());
        assertEquals("\"", vals.get(5).toString());
    }
}
 
Example 17
Source Project: deeplearning4j   Source File: CSVRecordReaderTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testRegex() throws Exception {
    CSVRecordReader reader = new CSVRegexRecordReader(0, ",", null, new String[] {null, "(.+) (.+) (.+)"});
    reader.initialize(new StringSplit("normal,1.2.3.4 space separator"));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        assertEquals("Entry count", 4, vals.size());
        assertEquals("normal", vals.get(0).toString());
        assertEquals("1.2.3.4", vals.get(1).toString());
        assertEquals("space", vals.get(2).toString());
        assertEquals("separator", vals.get(3).toString());
    }
}
 
Example 18
Source Project: deeplearning4j   Source File: StringToWritablesFunction.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public List<Writable> call(String s) throws Exception {
    recordReader.initialize(new StringSplit(s));
    Collection<Writable> next = recordReader.next();
    if (next instanceof List)
        return (List<Writable>) next;
    return new ArrayList<>(next);
}
 
Example 19
@Override
public void call(Iterator<String> stringIterator) throws Exception {
    String jvmuid = UIDProvider.getJVMUID();
    uid = Thread.currentThread().getId() + jvmuid.substring(0, Math.min(8, jvmuid.length()));

    List<List<Writable>> list = new ArrayList<>(batchSize);

    while (stringIterator.hasNext()) {
        String next = stringIterator.next();
        recordReader.initialize(new StringSplit(next));
        list.add(recordReader.next());

        processBatchIfRequired(list, !stringIterator.hasNext());
    }
}
 
Example 20
Source Project: DataVec   Source File: LineRecordReaderFunction.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public List<Writable> call(String s) throws Exception {
    recordReader.initialize(new StringSplit(s));
    return recordReader.next();
}
 
Example 21
Source Project: deeplearning4j   Source File: LineRecordReaderFunction.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public List<Writable> call(String s) throws Exception {
    recordReader.initialize(new StringSplit(s));
    return recordReader.next();
}