Java Code Examples for org.datavec.api.split.FileSplit

The following examples show how to use org.datavec.api.split.FileSplit. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: deeplearning4j   Source File: SvhnDataFetcher.java    License: Apache License 2.0 7 votes vote down vote up
@Override
public RecordReader getRecordReader(long rngSeed, int[] imgDim, DataSetType set, ImageTransform imageTransform) {
    try {
        Random rng = new Random(rngSeed);
        File datasetPath = getDataSetPath(set);

        FileSplit data = new FileSplit(datasetPath, BaseImageLoader.ALLOWED_FORMATS, rng);
        ObjectDetectionRecordReader recordReader = new ObjectDetectionRecordReader(imgDim[1], imgDim[0], imgDim[2],
                        imgDim[4], imgDim[3], null);

        recordReader.initialize(data);
        return recordReader;
    } catch (IOException e) {
        throw new RuntimeException("Could not download SVHN", e);
    }
}
 
Example 2
Source Project: DataVec   Source File: FileRecordReaderTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testReset() throws Exception {
    FileRecordReader rr = new FileRecordReader();
    rr.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));

    int nResets = 5;
    for (int i = 0; i < nResets; i++) {

        int lineCount = 0;
        while (rr.hasNext()) {
            List<Writable> line = rr.next();
            assertEquals(1, line.size());
            lineCount++;
        }
        assertFalse(rr.hasNext());
        assertEquals(1, lineCount);
        rr.reset();
    }
}
 
Example 3
Source Project: deeplearning4j   Source File: CodecReaderTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCodecReaderMeta() throws Exception {
    File file = new ClassPathResource("datavec-data-codec/fire_lowres.mp4").getFile();
    SequenceRecordReader reader = new CodecRecordReader();
    Configuration conf = new Configuration();
    conf.set(CodecRecordReader.RAVEL, "true");
    conf.set(CodecRecordReader.START_FRAME, "160");
    conf.set(CodecRecordReader.TOTAL_FRAMES, "500");
    conf.set(CodecRecordReader.ROWS, "80");
    conf.set(CodecRecordReader.COLUMNS, "46");
    reader.initialize(new FileSplit(file));
    reader.setConf(conf);
    assertTrue(reader.hasNext());
    List<List<Writable>> record = reader.sequenceRecord();
    assertEquals(500, record.size()); //500 frames

    reader.reset();
    SequenceRecord seqR = reader.nextSequence();
    assertEquals(record, seqR.getSequenceRecord());
    RecordMetaData meta = seqR.getMetaData();
    //        System.out.println(meta);
    assertTrue(meta.getURI().toString().endsWith(file.getName()));

    SequenceRecord fromMeta = reader.loadSequenceFromMetaData(meta);
    assertEquals(seqR, fromMeta);
}
 
Example 4
Source Project: Java-Deep-Learning-Cookbook   Source File: HyperParameterTuning.java    License: MIT License 6 votes vote down vote up
public RecordReader dataPreprocess() throws IOException, InterruptedException {
    //Schema Definitions
    Schema schema = new Schema.Builder()
            .addColumnsString("RowNumber")
            .addColumnInteger("CustomerId")
            .addColumnString("Surname")
            .addColumnInteger("CreditScore")
            .addColumnCategorical("Geography",Arrays.asList("France","Spain","Germany"))
            .addColumnCategorical("Gender",Arrays.asList("Male","Female"))
            .addColumnsInteger("Age","Tenure","Balance","NumOfProducts","HasCrCard","IsActiveMember","EstimatedSalary","Exited").build();

    //Schema Transformation
    TransformProcess transformProcess = new TransformProcess.Builder(schema)
            .removeColumns("RowNumber","Surname","CustomerId")
            .categoricalToInteger("Gender")
            .categoricalToOneHot("Geography")
            .removeColumns("Geography[France]")
            .build();

    //CSVReader - Reading from file and applying transformation
    RecordReader reader = new CSVRecordReader(1,',');
    reader.initialize(new FileSplit(new ClassPathResource("Churn_Modelling.csv").getFile()));
    RecordReader transformProcessRecordReader = new TransformProcessRecordReader(reader,transformProcess);
    return transformProcessRecordReader;
}
 
Example 5
@Test(timeout = 60000)

    public void testArrowBinary() throws Exception {
        Schema irisInputSchema = TrainUtils.getIrisInputSchema();
        ArrowRecordWriter arrowRecordWriter = new ArrowRecordWriter(irisInputSchema);
        CSVRecordReader reader = new CSVRecordReader();
        reader.initialize(new FileSplit(new ClassPathResource("iris.txt").getFile()));
        List<List<Writable>> writables = reader.next(150);

        File tmpFile = new File(temporary.getRoot(), "tmp.arrow");
        FileSplit fileSplit = new FileSplit(tmpFile);
        arrowRecordWriter.initialize(fileSplit, new NumberOfRecordsPartitioner());
        arrowRecordWriter.writeBatch(writables);
        byte[] arrowBytes = FileUtils.readFileToByteArray(tmpFile);

        Buffer buffer = Buffer.buffer(arrowBytes);
        ArrowBinaryInputAdapter arrowBinaryInputAdapter = new ArrowBinaryInputAdapter();
        ArrowWritableRecordBatch convert = arrowBinaryInputAdapter.convert(buffer, ConverterArgs.builder().schema(irisInputSchema).build(), null);
        assertEquals(writables.size(), convert.size());
    }
 
Example 6
Source Project: deeplearning4j   Source File: MultipleEpochsIteratorTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testNextAndReset() throws Exception {
    int epochs = 3;

    RecordReader rr = new CSVRecordReader();
    rr.initialize(new FileSplit(Resources.asFile("iris.txt")));
    DataSetIterator iter = new RecordReaderDataSetIterator(rr, 150);
    MultipleEpochsIterator multiIter = new MultipleEpochsIterator(epochs, iter);

    assertTrue(multiIter.hasNext());
    while (multiIter.hasNext()) {
        DataSet path = multiIter.next();
        assertFalse(path == null);
    }
    assertEquals(epochs, multiIter.epochs);
}
 
Example 7
Source Project: DataVec   Source File: LFWLoader.java    License: Apache License 2.0 6 votes vote down vote up
public void load(int batchSize, int numExamples, int numLabels, PathLabelGenerator labelGenerator,
                double splitTrainTest, Random rng) {
    if (!imageFilesExist()) {
        if (!fullDir.exists() || fullDir.listFiles() == null || fullDir.listFiles().length == 0) {
            fullDir.mkdir();

            if (useSubset) {
                log.info("Downloading {} subset...", localDir);
                downloadAndUntar(lfwSubsetData, fullDir);
            } else {
                log.info("Downloading {}...", localDir);
                downloadAndUntar(lfwData, fullDir);
                downloadAndUntar(lfwLabel, fullDir);
            }
        }
    }
    FileSplit fileSplit = new FileSplit(fullDir, ALLOWED_FORMATS, rng);
    BalancedPathFilter pathFilter = new BalancedPathFilter(rng, ALLOWED_FORMATS, labelGenerator, numExamples,
                    numLabels, 0, batchSize, null);
    inputSplit = fileSplit.sample(pathFilter, numExamples * splitTrainTest, numExamples * (1 - splitTrainTest));
}
 
Example 8
Source Project: DataVec   Source File: CSVRecordReaderTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test(expected = NoSuchElementException.class)
public void testCsvSkipAllLines() throws IOException, InterruptedException {
    final int numLines = 4;
    final List<Writable> lineList = Arrays.asList((Writable) new IntWritable(numLines - 1),
                    (Writable) new Text("one"), (Writable) new Text("two"), (Writable) new Text("three"));
    String header = ",one,two,three";
    List<String> lines = new ArrayList<>();
    for (int i = 0; i < numLines; i++)
        lines.add(Integer.toString(i) + header);
    File tempFile = File.createTempFile("csvSkipLines", ".csv");
    FileUtils.writeLines(tempFile, lines);

    CSVRecordReader rr = new CSVRecordReader(numLines, ',');
    rr.initialize(new FileSplit(tempFile));
    rr.reset();
    assertTrue(!rr.hasNext());
    rr.next();
}
 
Example 9
Source Project: DataVec   Source File: TfidfRecordReaderTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testReadRecordFromMetaData() throws Exception {
    TfidfVectorizer vectorizer = new TfidfVectorizer();
    Configuration conf = new Configuration();
    conf.setInt(TfidfVectorizer.MIN_WORD_FREQUENCY, 1);
    conf.setBoolean(RecordReader.APPEND_LABEL, true);
    vectorizer.initialize(conf);
    TfidfRecordReader reader = new TfidfRecordReader();
    reader.initialize(conf, new FileSplit(new ClassPathResource("labeled").getFile()));

    Record record = reader.nextRecord();

    Record reread = reader.loadFromMetaData(record.getMetaData());

    assertEquals(record.getRecord().size(), 2);
    assertEquals(reread.getRecord().size(), 2);
    assertEquals(record.getRecord().get(0), reread.getRecord().get(0));
    assertEquals(record.getRecord().get(1), reread.getRecord().get(1));
    assertEquals(record.getMetaData(), reread.getMetaData());
}
 
Example 10
@Test
public void simpleTransformTest() throws Exception {
    Schema schema = new Schema.Builder()
            .addColumnsDouble("%d", 0, 4)
            .build();
    TransformProcess transformProcess = new TransformProcess.Builder(schema).removeColumns("0").build();
    CSVRecordReader csvRecordReader = new CSVRecordReader();
    csvRecordReader.initialize(new FileSplit(new ClassPathResource("datavec-api/iris.dat").getFile()));
    TransformProcessRecordReader rr =
                    new TransformProcessRecordReader(csvRecordReader, transformProcess);
    int count = 0;
    List<List<Writable>> all = new ArrayList<>();
    while(rr.hasNext()){
        List<Writable> next = rr.next();
        assertEquals(4, next.size());
        count++;
        all.add(next);
    }
    assertEquals(150, count);

    //Test batch:
    assertTrue(rr.resetSupported());
    rr.reset();
    List<List<Writable>> batch = rr.next(150);
    assertEquals(all, batch);
}
 
Example 11
Source Project: DataVec   Source File: CodecReaderTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCodecReaderMeta() throws Exception {
    File file = new ClassPathResource("fire_lowres.mp4").getFile();
    SequenceRecordReader reader = new CodecRecordReader();
    Configuration conf = new Configuration();
    conf.set(CodecRecordReader.RAVEL, "true");
    conf.set(CodecRecordReader.START_FRAME, "160");
    conf.set(CodecRecordReader.TOTAL_FRAMES, "500");
    conf.set(CodecRecordReader.ROWS, "80");
    conf.set(CodecRecordReader.COLUMNS, "46");
    reader.initialize(new FileSplit(file));
    reader.setConf(conf);
    assertTrue(reader.hasNext());
    List<List<Writable>> record = reader.sequenceRecord();
    assertEquals(500, record.size()); //500 frames

    reader.reset();
    SequenceRecord seqR = reader.nextSequence();
    assertEquals(record, seqR.getSequenceRecord());
    RecordMetaData meta = seqR.getMetaData();
    //        System.out.println(meta);
    assertTrue(meta.getURI().toString().endsWith("fire_lowres.mp4"));

    SequenceRecord fromMeta = reader.loadSequenceFromMetaData(meta);
    assertEquals(seqR, fromMeta);
}
 
Example 12
Source Project: deeplearning4j   Source File: TfidfRecordReaderTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testReadRecordFromMetaData() throws Exception {
    TfidfVectorizer vectorizer = new TfidfVectorizer();
    Configuration conf = new Configuration();
    conf.setInt(TfidfVectorizer.MIN_WORD_FREQUENCY, 1);
    conf.setBoolean(RecordReader.APPEND_LABEL, true);
    vectorizer.initialize(conf);
    TfidfRecordReader reader = new TfidfRecordReader();
    File f = testDir.newFolder();
    new ClassPathResource("datavec-data-nlp/labeled/").copyDirectory(f);
    reader.initialize(conf, new FileSplit(f));

    Record record = reader.nextRecord();

    Record reread = reader.loadFromMetaData(record.getMetaData());

    assertEquals(record.getRecord().size(), 2);
    assertEquals(reread.getRecord().size(), 2);
    assertEquals(record.getRecord().get(0), reread.getRecord().get(0));
    assertEquals(record.getRecord().get(1), reread.getRecord().get(1));
    assertEquals(record.getMetaData(), reread.getMetaData());
}
 
Example 13
Source Project: deeplearning4j   Source File: MultipleEpochsIteratorTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testLoadFullDataSet() throws Exception {
    int epochs = 3;

    RecordReader rr = new CSVRecordReader();
    rr.initialize(new FileSplit(Resources.asFile("iris.txt")));
    DataSetIterator iter = new RecordReaderDataSetIterator(rr, 150);
    DataSet ds = iter.next(50);

    assertEquals(50, ds.getFeatures().size(0));

    MultipleEpochsIterator multiIter = new MultipleEpochsIterator(epochs, ds);

    assertTrue(multiIter.hasNext());
    int count = 0;
    while (multiIter.hasNext()) {
        DataSet path = multiIter.next();
        assertNotNull(path);
        assertEquals(50, path.numExamples(), 0);
        count++;
    }
    assertEquals(epochs, count);
    assertEquals(epochs, multiIter.epochs);
}
 
Example 14
Source Project: DataVec   Source File: SVMLightRecordWriterTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testNonIntegerButValidMultilabel() throws Exception {
    List<Writable> record = Arrays.asList((Writable) new IntWritable(3),
            new IntWritable(2),
            new DoubleWritable(1.0));
    File tempFile = File.createTempFile("SVMLightRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    try (SVMLightRecordWriter writer = new SVMLightRecordWriter()) {
        Configuration configWriter = new Configuration();
        configWriter.setInt(SVMLightRecordWriter.FEATURE_FIRST_COLUMN, 0);
        configWriter.setInt(SVMLightRecordWriter.FEATURE_LAST_COLUMN, 1);
        configWriter.setBoolean(SVMLightRecordWriter.MULTILABEL, true);
        FileSplit outputSplit = new FileSplit(tempFile);
        writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        writer.write(record);
    }
}
 
Example 15
Source Project: deeplearning4j   Source File: TfidfRecordReaderTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testRecordMetaData() throws Exception {
    TfidfVectorizer vectorizer = new TfidfVectorizer();
    Configuration conf = new Configuration();
    conf.setInt(TfidfVectorizer.MIN_WORD_FREQUENCY, 1);
    conf.setBoolean(RecordReader.APPEND_LABEL, true);
    vectorizer.initialize(conf);
    TfidfRecordReader reader = new TfidfRecordReader();
    File f = testDir.newFolder();
    new ClassPathResource("datavec-data-nlp/labeled/").copyDirectory(f);
    reader.initialize(conf, new FileSplit(f));

    while (reader.hasNext()) {
        Record record = reader.nextRecord();
        assertNotNull(record.getMetaData().getURI());
        assertEquals(record.getMetaData().getReaderClass(), TfidfRecordReader.class);
    }
}
 
Example 16
Source Project: deeplearning4j   Source File: DataSetIteratorTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testMnist() throws Exception {
    ClassPathResource cpr = new ClassPathResource("mnist_first_200.txt");
    CSVRecordReader rr = new CSVRecordReader(0, ',');
    rr.initialize(new FileSplit(cpr.getTempFileFromArchive()));
    RecordReaderDataSetIterator dsi = new RecordReaderDataSetIterator(rr, 10, 0, 10);

    MnistDataSetIterator iter = new MnistDataSetIterator(10, 200, false, true, false, 0);

    while (dsi.hasNext()) {
        DataSet dsExp = dsi.next();
        DataSet dsAct = iter.next();

        INDArray fExp = dsExp.getFeatures();
        fExp.divi(255);
        INDArray lExp = dsExp.getLabels();

        INDArray fAct = dsAct.getFeatures();
        INDArray lAct = dsAct.getLabels();

        assertEquals(fExp, fAct.castTo(fExp.dataType()));
        assertEquals(lExp, lAct.castTo(lExp.dataType()));
    }
    assertFalse(iter.hasNext());
}
 
Example 17
Source Project: deeplearning4j   Source File: ConvolutionLayerSetupTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testLRN() throws Exception {
    List<String> labels = new ArrayList<>(Arrays.asList("Zico", "Ziwang_Xu"));
    File dir = testDir.newFolder();
    new ClassPathResource("lfwtest/").copyDirectory(dir);
    String rootDir = dir.getAbsolutePath();

    RecordReader reader = new ImageRecordReader(28, 28, 3);
    reader.initialize(new FileSplit(new File(rootDir)));
    DataSetIterator recordReader = new RecordReaderDataSetIterator(reader, 10, 1, labels.size());
    labels.remove("lfwtest");
    NeuralNetConfiguration.ListBuilder builder = (NeuralNetConfiguration.ListBuilder) incompleteLRN();
    builder.setInputType(InputType.convolutional(28, 28, 3));

    MultiLayerConfiguration conf = builder.build();

    ConvolutionLayer layer2 = (ConvolutionLayer) conf.getConf(3).getLayer();
    assertEquals(6, layer2.getNIn());

}
 
Example 18
Source Project: DataVec   Source File: LibSvmRecordWriterTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testNonIntegerButValidMultilabel() throws Exception {
    List<Writable> record = Arrays.asList((Writable) new IntWritable(3),
            new IntWritable(2),
            new DoubleWritable(1.0));
    File tempFile = File.createTempFile("LibSvmRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    try (LibSvmRecordWriter writer = new LibSvmRecordWriter()) {
        Configuration configWriter = new Configuration();
        configWriter.setInt(LibSvmRecordWriter.FEATURE_FIRST_COLUMN, 0);
        configWriter.setInt(LibSvmRecordWriter.FEATURE_LAST_COLUMN, 1);
        configWriter.setBoolean(LibSvmRecordWriter.MULTILABEL, true);
        FileSplit outputSplit = new FileSplit(tempFile);
        writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        writer.write(record);
    }
}
 
Example 19
Source Project: DataVec   Source File: LibSvmRecordWriterTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test(expected = NumberFormatException.class)
public void nonIntegerMultilabel() throws Exception {
    List<Writable> record = Arrays.asList((Writable) new IntWritable(3),
                                            new IntWritable(2),
                                            new DoubleWritable(1.2));
    File tempFile = File.createTempFile("LibSvmRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    try (LibSvmRecordWriter writer = new LibSvmRecordWriter()) {
        Configuration configWriter = new Configuration();
        configWriter.setInt(LibSvmRecordWriter.FEATURE_FIRST_COLUMN, 0);
        configWriter.setInt(LibSvmRecordWriter.FEATURE_LAST_COLUMN, 1);
        configWriter.setBoolean(LibSvmRecordWriter.MULTILABEL, true);
        FileSplit outputSplit = new FileSplit(tempFile);
        writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        writer.write(record);
    }
}
 
Example 20
Source Project: deeplearning4j   Source File: ExcelRecordWriterTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testWriter() throws Exception  {
    ExcelRecordWriter excelRecordWriter = new ExcelRecordWriter();
    val records = records();
    File tmpDir = testDir.newFolder();
    File outputFile = new File(tmpDir,"testexcel.xlsx");
    outputFile.deleteOnExit();
    FileSplit fileSplit = new FileSplit(outputFile);
    excelRecordWriter.initialize(fileSplit,new NumberOfRecordsPartitioner());
    excelRecordWriter.writeBatch(records.getRight());
    excelRecordWriter.close();
    File parentFile = outputFile.getParentFile();
    assertEquals(1,parentFile.list().length);

    ExcelRecordReader excelRecordReader = new ExcelRecordReader();
    excelRecordReader.initialize(fileSplit);
    List<List<Writable>> next = excelRecordReader.next(10);
    assertEquals(10,next.size());

}
 
Example 21
Source Project: deeplearning4j   Source File: ExcelRecordReaderTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSimple() throws Exception {
    RecordReader excel = new ExcelRecordReader();
    excel.initialize(new FileSplit(new ClassPathResource("datavec-excel/testsheet.xlsx").getFile()));
    assertTrue(excel.hasNext());
    List<Writable> next = excel.next();
    assertEquals(3,next.size());

    RecordReader headerReader = new ExcelRecordReader(1);
    headerReader.initialize(new FileSplit(new ClassPathResource("datavec-excel/testsheetheader.xlsx").getFile()));
    assertTrue(excel.hasNext());
    List<Writable> next2 = excel.next();
    assertEquals(3,next2.size());


}
 
Example 22
Source Project: DataVec   Source File: TransformProcessRecordReaderTests.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void simpleTransformTest() throws Exception {
    Schema schema = new Schema.Builder()
            .addColumnsDouble("%d", 0, 4)
            .build();
    TransformProcess transformProcess = new TransformProcess.Builder(schema).removeColumns("0").build();
    CSVRecordReader csvRecordReader = new CSVRecordReader();
    csvRecordReader.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));
    TransformProcessRecordReader rr =
                    new TransformProcessRecordReader(csvRecordReader, transformProcess);
    int count = 0;
    List<List<Writable>> all = new ArrayList<>();
    while(rr.hasNext()){
        List<Writable> next = rr.next();
        assertEquals(4, next.size());
        count++;
        all.add(next);
    }
    assertEquals(150, count);

    //Test batch:
    assertTrue(rr.resetSupported());
    rr.reset();
    List<List<Writable>> batch = rr.next(150);
    assertEquals(all, batch);
}
 
Example 23
@Test
public void testExcludeStringColCSV() throws Exception {
    File csvFile = temporaryFolder.newFile();

    StringBuilder sb = new StringBuilder();
    for(int i=1; i<=10; i++ ){
        if(i > 1){
            sb.append("\n");
        }
        sb.append("skip_").append(i).append(",").append(i).append(",").append(i + 0.5);
    }
    FileUtils.writeStringToFile(csvFile, sb.toString());

    RecordReader rr = new CSVRecordReader();
    rr.initialize(new FileSplit(csvFile));

    RecordReaderMultiDataSetIterator rrmdsi = new RecordReaderMultiDataSetIterator.Builder(10)
            .addReader("rr", rr)
            .addInput("rr", 1, 1)
            .addOutput("rr", 2, 2)
            .build();

    INDArray expFeatures = Nd4j.linspace(1,10,10).reshape(1,10).transpose();
    INDArray expLabels = Nd4j.linspace(1,10,10).addi(0.5).reshape(1,10).transpose();

    MultiDataSet mds = rrmdsi.next();
    assertFalse(rrmdsi.hasNext());

    assertEquals(expFeatures, mds.getFeatures(0).castTo(expFeatures.dataType()));
    assertEquals(expLabels, mds.getLabels(0).castTo(expLabels.dataType()));
}
 
Example 24
Source Project: Java-Deep-Learning-Cookbook   Source File: FileSplitExample.java    License: MIT License 5 votes vote down vote up
public static void main(String[] args) {
    String[] allowedFormats=new String[]{".JPEG"};
    //recursive -> true, so that it will check for all subdirectories
    FileSplit fileSplit = new FileSplit(new File("temp"),allowedFormats,true);
    fileSplit.locationsIterator().forEachRemaining(System.out::println);

}
 
Example 25
Source Project: Java-Deep-Learning-Cookbook   Source File: FileSplitExample.java    License: MIT License 5 votes vote down vote up
public static void main(String[] args) {
    String[] allowedFormats=new String[]{".JPEG"};
    //recursive -> true, so that it will check for all subdirectories
    FileSplit fileSplit = new FileSplit(new File("temp"),allowedFormats,true);
    fileSplit.locationsIterator().forEachRemaining(System.out::println);

}
 
Example 26
Source Project: deeplearning4j   Source File: SVMLightRecordReaderTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test(expected = UnsupportedOperationException.class)
public void testInconsistentNumLabelsException() throws Exception {
    SVMLightRecordReader rr = new SVMLightRecordReader();
    Configuration config = new Configuration();
    config.setBoolean(SVMLightRecordReader.ZERO_BASED_INDEXING, false);
    rr.initialize(config, new FileSplit(new ClassPathResource("datavec-api/svmlight/inconsistentNumLabels.txt").getFile()));
    while (rr.hasNext())
        rr.next();
}
 
Example 27
@Test
public void testLineRecordReader() throws Exception {

    File dataFile = new ClassPathResource("iris.dat").getFile();
    List<String> lines = FileUtils.readLines(dataFile);

    List<String> linesRdd = (lines);

    CSVRecordReader rr = new CSVRecordReader(0, ',');

    List<List<Writable>> out = linesRdd.stream().map(input -> new LineRecordReaderFunction(rr).apply(input)).collect(Collectors.toList());
    List<List<Writable>> outList = out;


    CSVRecordReader rr2 = new CSVRecordReader(0, ',');
    rr2.initialize(new FileSplit(dataFile));
    Set<List<Writable>> expectedSet = new HashSet<>();
    int totalCount = 0;
    while (rr2.hasNext()) {
        expectedSet.add(rr2.next());
        totalCount++;
    }

    assertEquals(totalCount, outList.size());

    for (List<Writable> line : outList) {
        assertTrue(expectedSet.contains(line));
    }
}
 
Example 28
Source Project: deeplearning4j   Source File: SVMLightRecordReaderTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test(expected = NoSuchElementException.class)
public void testNoSuchElementException() throws Exception {
    SVMLightRecordReader rr = new SVMLightRecordReader();
    Configuration config = new Configuration();
    config.setInt(SVMLightRecordReader.NUM_FEATURES, 11);
    rr.initialize(config, new FileSplit(new ClassPathResource("datavec-api/svmlight/basic.txt").getFile()));
    while (rr.hasNext())
        rr.next();
    rr.next();
}
 
Example 29
Source Project: deeplearning4j   Source File: TestImageRecordReader.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testListenerInvocationBatch() throws IOException {
    ParentPathLabelGenerator labelMaker = new ParentPathLabelGenerator();
    ImageRecordReader rr = new ImageRecordReader(32, 32, 3, labelMaker);
    File f = testDir.newFolder();
    new ClassPathResource("datavec-data-image/testimages/").copyDirectory(f);

    File parent = f;
    int numFiles = 6;
    rr.initialize(new FileSplit(parent));
    CountingListener counting = new CountingListener(new LogRecordListener());
    rr.setListeners(counting);
    rr.next(numFiles + 1);
    assertEquals(numFiles, counting.getCount());
}
 
Example 30
Source Project: deeplearning4j   Source File: RecordMapperTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCopyFromCsvToArrow() throws Exception {
    val recordsPair = records();

    Path p = Files.createTempFile("csvwritetest", ".csv");
    FileUtils.write(p.toFile(),recordsPair.getFirst());
    p.toFile().deleteOnExit();


    CSVRecordReader recordReader = new CSVRecordReader();
    FileSplit fileSplit = new FileSplit(p.toFile());

    ArrowRecordWriter arrowRecordWriter = new ArrowRecordWriter(recordsPair.getMiddle());
    File outputFile = Files.createTempFile("outputarrow","arrow").toFile();
    FileSplit outputFileSplit = new FileSplit(outputFile);
    RecordMapper mapper = RecordMapper.builder().batchSize(10).inputUrl(fileSplit)
            .outputUrl(outputFileSplit).partitioner(new NumberOfRecordsPartitioner())
            .recordReader(recordReader).recordWriter(arrowRecordWriter)
            .build();
    mapper.copy();

    ArrowRecordReader arrowRecordReader = new ArrowRecordReader();
    arrowRecordReader.initialize(outputFileSplit);
    List<List<Writable>> next = arrowRecordReader.next(10);
    System.out.println(next);
    assertEquals(10,next.size());

}