org.datavec.api.split.StringSplit Java Examples

The following examples show how to use org.datavec.api.split.StringSplit. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FeatureRecordReader.java    From FancyBing with GNU General Public License v3.0 6 votes vote down vote up
@Override
public boolean hasNext() {
    if (iter != null && iter.hasNext()) {
        return true;
    } else {
        if (locations != null && !(inputSplit instanceof StringSplit) && splitIndex < locations.length - 1) {
            splitIndex++;
            lineIndex = 0; //New split -> reset line count
            try {
                close();
                iter = lineIterator(new InputStreamReader(locations[splitIndex].toURL().openStream()));
                onLocationOpen(locations[splitIndex]);
            } catch (IOException e) {
                e.printStackTrace();
            }

            return iter.hasNext();
        }

        return false;
    }
}
 
Example #2
Source File: LineRecordReader.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Override
public boolean hasNext() {
    if (iter != null && iter.hasNext()) {
        return true;
    } else {
        if (locations != null && !(inputSplit instanceof StringSplit) && splitIndex < locations.length - 1) {
            splitIndex++;
            lineIndex = 0; //New split -> reset line count
            try {
                close();
                iter = IOUtils.lineIterator(new InputStreamReader(locations[splitIndex].toURL().openStream()));
                onLocationOpen(locations[splitIndex]);
            } catch (IOException e) {
                e.printStackTrace();
            }

            return iter.hasNext();
        }

        return false;
    }
}
 
Example #3
Source File: LineRecordReader.java    From DataVec with Apache License 2.0 6 votes vote down vote up
protected Iterator<String> getIterator(int location) {
    Iterator<String> iterator = null;
    if (inputSplit instanceof StringSplit) {
        StringSplit stringSplit = (StringSplit) inputSplit;
        iterator = Collections.singletonList(stringSplit.getData()).listIterator();
    } else if (inputSplit instanceof InputStreamInputSplit) {
        InputStream is = ((InputStreamInputSplit) inputSplit).getIs();
        if (is != null) {
            iterator = IOUtils.lineIterator(new InputStreamReader(is));
        }
    } else {
        this.locations = inputSplit.locations();
        if (locations != null && locations.length > 0) {
            InputStream inputStream;
            try {
                inputStream = locations[location].toURL().openStream();
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
            iterator = IOUtils.lineIterator(new InputStreamReader(inputStream));
        }
    }
    if (iterator == null)
        throw new UnsupportedOperationException("Unknown input split: " + inputSplit);
    return iterator;
}
 
Example #4
Source File: LineRecordReader.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public boolean hasNext() {
    Preconditions.checkState(initialized, "Record reader has not been initialized");

    if (iter != null && iter.hasNext()) {
        return true;
    } else {
        if (locations != null && !(inputSplit instanceof StringSplit) && splitIndex < locations.length - 1) {
            splitIndex++;
            lineIndex = 0; //New split -> reset line count
            try {
                close();
                iter = getIterator(splitIndex);
                onLocationOpen(locations[splitIndex]);
            } catch (IOException e) {
                log.error("",e);
            }

            return iter.hasNext();
        }

        return false;
    }
}
 
Example #5
Source File: StringToDataSetExportFunction.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void call(Iterator<String> stringIterator) throws Exception {
    String jvmuid = UIDProvider.getJVMUID();
    uid = Thread.currentThread().getId() + jvmuid.substring(0, Math.min(8, jvmuid.length()));

    List<List<Writable>> list = new ArrayList<>(batchSize);

    while (stringIterator.hasNext()) {
        String next = stringIterator.next();
        recordReader.initialize(new StringSplit(next));
        list.add(recordReader.next());

        processBatchIfRequired(list, !stringIterator.hasNext());
    }
}
 
Example #6
Source File: FeatureRecordReader.java    From FancyBing with GNU General Public License v3.0 5 votes vote down vote up
@Override
public List<Writable> next() {
    List<Writable> ret = new ArrayList<>();
    if (count % 1000 == 0) {
    	log.info((new Date()) + " Data " + count);
    }
    count++;
    if (iter.hasNext()) {
        ret = iter.next();
        invokeListeners(ret);
        lineIndex++;
        return ret;
    } else {
        if (!(inputSplit instanceof StringSplit) && splitIndex < locations.length - 1) {
            splitIndex++;
            lineIndex = 0;
            try {
                close();
                iter = lineIterator(new InputStreamReader(locations[splitIndex].toURL().openStream()));
                onLocationOpen(locations[splitIndex]);
            } catch (IOException e) {
                e.printStackTrace();
            }
            lineIndex = 0; //New split opened -> reset line index

            if (iter.hasNext()) {
                ret = iter.next();
                invokeListeners(ret);
                lineIndex++;
                return ret;
            }
        }

        throw new NoSuchElementException("No more elements found!");
    }
}
 
Example #7
Source File: StringToWritablesFunction.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public List<Writable> call(String s) throws Exception {
    recordReader.initialize(new StringSplit(s));
    Collection<Writable> next = recordReader.next();
    if (next instanceof List)
        return (List<Writable>) next;
    return new ArrayList<>(next);
}
 
Example #8
Source File: CSVRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testRegex() throws Exception {
    CSVRecordReader reader = new CSVRegexRecordReader(0, ",", null, new String[] {null, "(.+) (.+) (.+)"});
    reader.initialize(new StringSplit("normal,1.2.3.4 space separator"));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        assertEquals("Entry count", 4, vals.size());
        assertEquals("normal", vals.get(0).toString());
        assertEquals("1.2.3.4", vals.get(1).toString());
        assertEquals("space", vals.get(2).toString());
        assertEquals("separator", vals.get(3).toString());
    }
}
 
Example #9
Source File: CSVRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testWithQuotes() throws Exception {
    CSVRecordReader reader = new CSVRecordReader(0, ',', '\"');
    reader.initialize(new StringSplit("1,0,3,\"Braund, Mr. Owen Harris\",male,\"\"\"\""));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        assertEquals("Entry count", 6, vals.size());
        assertEquals("1", vals.get(0).toString());
        assertEquals("0", vals.get(1).toString());
        assertEquals("3", vals.get(2).toString());
        assertEquals("Braund, Mr. Owen Harris", vals.get(3).toString());
        assertEquals("male", vals.get(4).toString());
        assertEquals("\"", vals.get(5).toString());
    }
}
 
Example #10
Source File: CSVRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testEmptyEntries() throws Exception {
    CSVRecordReader reader = new CSVRecordReader();
    reader.initialize(new StringSplit("1,1,8.0,,,,14.0,,,,15.0,,,,,,,,,,,,"));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        assertEquals("Entry count", 23, vals.size());
    }
}
 
Example #11
Source File: CSVRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testNext() throws Exception {
    CSVRecordReader reader = new CSVRecordReader();
    reader.initialize(new StringSplit("1,1,8.0,,,,14.0,,,,15.0,,,,,,,,,,,,1"));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        List<Writable> arr = new ArrayList<>(vals);

        assertEquals("Entry count", 23, vals.size());
        Text lastEntry = (Text) arr.get(arr.size() - 1);
        assertEquals("Last entry garbage", 1, lastEntry.getLength());
    }
}
 
Example #12
Source File: LineRecordReader.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(InputSplit split) throws IOException, InterruptedException {
    super.initialize(split);
    if(!(inputSplit instanceof StringSplit || inputSplit instanceof InputStreamInputSplit)){
        final ArrayList<URI> uris = new ArrayList<>();
        final Iterator<URI> uriIterator = inputSplit.locationsIterator();
        while(uriIterator.hasNext()) uris.add(uriIterator.next());

        this.locations = uris.toArray(new URI[0]);
    }
    this.iter = getIterator(0);
    this.initialized = true;
}
 
Example #13
Source File: LineRecordReaderFunction.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public List<Writable> apply(String s) {
    try {
        recordReader.initialize(new StringSplit(s));
    } catch (Exception e) {
        throw new IllegalStateException(e);
    }
    return recordReader.next();
}
 
Example #14
Source File: StringToWritablesFunction.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public List<Writable> call(String s) throws Exception {
    recordReader.initialize(new StringSplit(s));
    Collection<Writable> next = recordReader.next();
    if (next instanceof List)
        return (List<Writable>) next;
    return new ArrayList<>(next);
}
 
Example #15
Source File: CSVRecordReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testRegex() throws Exception {
    CSVRecordReader reader = new CSVRegexRecordReader(0, ",", null, new String[] {null, "(.+) (.+) (.+)"});
    reader.initialize(new StringSplit("normal,1.2.3.4 space separator"));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        assertEquals("Entry count", 4, vals.size());
        assertEquals("normal", vals.get(0).toString());
        assertEquals("1.2.3.4", vals.get(1).toString());
        assertEquals("space", vals.get(2).toString());
        assertEquals("separator", vals.get(3).toString());
    }
}
 
Example #16
Source File: CSVRecordReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testWithQuotes() throws Exception {
    CSVRecordReader reader = new CSVRecordReader(0, ',', '\"');
    reader.initialize(new StringSplit("1,0,3,\"Braund, Mr. Owen Harris\",male,\"\"\"\""));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        assertEquals("Entry count", 6, vals.size());
        assertEquals("1", vals.get(0).toString());
        assertEquals("0", vals.get(1).toString());
        assertEquals("3", vals.get(2).toString());
        assertEquals("Braund, Mr. Owen Harris", vals.get(3).toString());
        assertEquals("male", vals.get(4).toString());
        assertEquals("\"", vals.get(5).toString());
    }
}
 
Example #17
Source File: CSVRecordReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testEmptyEntries() throws Exception {
    CSVRecordReader reader = new CSVRecordReader();
    reader.initialize(new StringSplit("1,1,8.0,,,,14.0,,,,15.0,,,,,,,,,,,,"));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        assertEquals("Entry count", 23, vals.size());
    }
}
 
Example #18
Source File: CSVRecordReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testNext() throws Exception {
    CSVRecordReader reader = new CSVRecordReader();
    reader.initialize(new StringSplit("1,1,8.0,,,,14.0,,,,15.0,,,,,,,,,,,,1"));
    while (reader.hasNext()) {
        List<Writable> vals = reader.next();
        List<Writable> arr = new ArrayList<>(vals);

        assertEquals("Entry count", 23, vals.size());
        Text lastEntry = (Text) arr.get(arr.size() - 1);
        assertEquals("Last entry garbage", 1, lastEntry.getLength());
    }
}
 
Example #19
Source File: LineRecordReaderFunction.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public List<Writable> apply(String s) {
    try {
        recordReader.initialize(new StringSplit(s));
    } catch (Exception e) {
        throw new IllegalStateException(e);
    }
    return recordReader.next();
}
 
Example #20
Source File: LineRecordReaderFunction.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Override
public List<Writable> call(String s) throws Exception {
    recordReader.initialize(new StringSplit(s));
    return recordReader.next();
}
 
Example #21
Source File: LineRecordReaderFunction.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public List<Writable> call(String s) throws Exception {
    recordReader.initialize(new StringSplit(s));
    return recordReader.next();
}