Java Code Examples for org.datavec.api.records.reader.RecordReader#initialize()

The following examples show how to use org.datavec.api.records.reader.RecordReader#initialize() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ExcelRecordReaderTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testSimple() throws Exception {
    RecordReader excel = new ExcelRecordReader();
    excel.initialize(new FileSplit(new ClassPathResource("testsheet.xlsx").getFile()));
    assertTrue(excel.hasNext());
    List<Writable> next = excel.next();
    assertEquals(3,next.size());

    RecordReader headerReader = new ExcelRecordReader(1);
    headerReader.initialize(new FileSplit(new ClassPathResource("testsheetheader.xlsx").getFile()));
    assertTrue(excel.hasNext());
    List<Writable> next2 = excel.next();
    assertEquals(3,next2.size());


}
 
Example 2
Source File: RecordReaderMultiDataSetIteratorTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testsBasicMeta() throws Exception {
    //As per testBasic - but also loading metadata
    RecordReader rr2 = new CSVRecordReader(0, ',');
    rr2.initialize(new FileSplit(Resources.asFile("iris.txt")));

    RecordReaderMultiDataSetIterator rrmdsi = new RecordReaderMultiDataSetIterator.Builder(10)
                    .addReader("reader", rr2).addInput("reader", 0, 3).addOutputOneHot("reader", 4, 3).build();

    rrmdsi.setCollectMetaData(true);

    int count = 0;
    while (rrmdsi.hasNext()) {
        MultiDataSet mds = rrmdsi.next();
        MultiDataSet fromMeta = rrmdsi.loadFromMetaData(mds.getExampleMetaData(RecordMetaData.class));
        assertEquals(mds, fromMeta);
        count++;
    }
    assertEquals(150 / 10, count);
}
 
Example 3
Source File: HyperParameterTuningArbiterUiExample.java    From Java-Deep-Learning-Cookbook with MIT License 6 votes vote down vote up
public RecordReader dataPreprocess() throws IOException, InterruptedException {
    //Schema Definitions
    Schema schema = new Schema.Builder()
            .addColumnsString("RowNumber")
            .addColumnInteger("CustomerId")
            .addColumnString("Surname")
            .addColumnInteger("CreditScore")
            .addColumnCategorical("Geography", Arrays.asList("France","Spain","Germany"))
            .addColumnCategorical("Gender",Arrays.asList("Male","Female"))
            .addColumnsInteger("Age","Tenure","Balance","NumOfProducts","HasCrCard","IsActiveMember","EstimatedSalary","Exited").build();

    //Schema Transformation
    TransformProcess transformProcess = new TransformProcess.Builder(schema)
            .removeColumns("RowNumber","Surname","CustomerId")
            .categoricalToInteger("Gender")
            .categoricalToOneHot("Geography")
            .removeColumns("Geography[France]")
            .build();

    //CSVReader - Reading from file and applying transformation
    RecordReader reader = new CSVRecordReader(1,',');
    reader.initialize(new FileSplit(new ClassPathResource("Churn_Modelling.csv").getFile()));
    RecordReader transformProcessRecordReader = new TransformProcessRecordReader(reader,transformProcess);
    return transformProcessRecordReader;
}
 
Example 4
Source File: MultipleEpochsIteratorTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testNextAndReset() throws Exception {
    int epochs = 3;

    RecordReader rr = new CSVRecordReader();
    rr.initialize(new FileSplit(Resources.asFile("iris.txt")));
    DataSetIterator iter = new RecordReaderDataSetIterator(rr, 150);
    MultipleEpochsIterator multiIter = new MultipleEpochsIterator(epochs, iter);

    assertTrue(multiIter.hasNext());
    while (multiIter.hasNext()) {
        DataSet path = multiIter.next();
        assertFalse(path == null);
    }
    assertEquals(epochs, multiIter.epochs);
}
 
Example 5
Source File: LineReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testLineReaderWithInputStreamInputSplit() throws Exception {
    String tempDir = System.getProperty("java.io.tmpdir");
    File tmpdir = new File(tempDir, "tmpdir");
    tmpdir.mkdir();

    File tmp1 = new File(tmpdir, "tmp1.txt.gz");

    OutputStream os = new GZIPOutputStream(new FileOutputStream(tmp1, false));
    IOUtils.writeLines(Arrays.asList("1", "2", "3", "4", "5", "6", "7", "8", "9"), null, os);
    os.flush();
    os.close();

    InputSplit split = new InputStreamInputSplit(new GZIPInputStream(new FileInputStream(tmp1)));

    RecordReader reader = new LineRecordReader();
    reader.initialize(split);

    int count = 0;
    while (reader.hasNext()) {
        assertEquals(1, reader.next().size());
        count++;
    }

    assertEquals(9, count);

    try {
        FileUtils.deleteDirectory(tmpdir);
    } catch (Exception e) {
        e.printStackTrace();
    }
}
 
Example 6
Source File: ModelGenerator.java    From arabic-characters-recognition with Apache License 2.0 5 votes vote down vote up
private static DataSetIterator readCSVDataset(String csvFileClasspath, int BATCH_SIZE, int LABEL_INDEX, int numClasses)
        throws IOException, InterruptedException {

    RecordReader rr = new CSVRecordReader();
    rr.initialize(new FileSplit(new File(csvFileClasspath)));
    DataSetIterator iterator = new RecordReaderDataSetIterator(rr, BATCH_SIZE, LABEL_INDEX, numClasses);

    return iterator;
}
 
Example 7
Source File: TestAnalyzeLocal.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testAnalysisBasic() throws Exception {

    RecordReader rr = new CSVRecordReader();
    rr.initialize(new FileSplit(new ClassPathResource("iris.txt").getFile()));

    Schema s = new Schema.Builder()
            .addColumnsDouble("0", "1", "2", "3")
            .addColumnInteger("label")
            .build();

    DataAnalysis da = AnalyzeLocal.analyze(s, rr);

    System.out.println(da);

    //Compare:
    List<List<Writable>> list = new ArrayList<>();
    rr.reset();
    while(rr.hasNext()){
        list.add(rr.next());
    }

    INDArray arr = RecordConverter.toMatrix(DataType.DOUBLE, list);
    INDArray mean = arr.mean(0);
    INDArray std = arr.std(0);

    for( int i=0; i<5; i++ ){
        double m = ((NumericalColumnAnalysis)da.getColumnAnalysis().get(i)).getMean();
        double stddev = ((NumericalColumnAnalysis)da.getColumnAnalysis().get(i)).getSampleStdev();
        assertEquals(mean.getDouble(i), m, 1e-3);
        assertEquals(std.getDouble(i), stddev, 1e-3);
    }

}
 
Example 8
Source File: RecordReaderDataSetiteratorTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
    public void testRecordReaderMetaData() throws Exception {

        RecordReader csv = new CSVRecordReader();
        csv.initialize(new FileSplit(Resources.asFile("iris.txt")));

        int batchSize = 10;
        int labelIdx = 4;
        int numClasses = 3;

        RecordReaderDataSetIterator rrdsi = new RecordReaderDataSetIterator(csv, batchSize, labelIdx, numClasses);
        rrdsi.setCollectMetaData(true);

        while (rrdsi.hasNext()) {
            DataSet ds = rrdsi.next();
            List<RecordMetaData> meta = ds.getExampleMetaData(RecordMetaData.class);
            int i = 0;
            for (RecordMetaData m : meta) {
                Record r = csv.loadFromMetaData(m);
                INDArray row = ds.getFeatures().getRow(i);
//                if(i <= 3) {
//                    System.out.println(m.getLocation() + "\t" + r.getRecord() + "\t" + row);
//                }

                for (int j = 0; j < 4; j++) {
                    double exp = r.getRecord().get(j).toDouble();
                    double act = row.getDouble(j);
                    assertEquals("Failed on idx: " + j, exp, act, 1e-6);
                }
                i++;
            }
//            System.out.println();

            DataSet fromMeta = rrdsi.loadFromMetaData(meta);
            assertEquals(ds, fromMeta);
        }
    }
 
Example 9
Source File: JacksonRecordReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testReadingYaml() throws Exception {
    //Exact same information as JSON format, but in YAML format

    ClassPathResource cpr = new ClassPathResource("yaml/yaml_test_0.txt");
    String path = cpr.getFile().getAbsolutePath().replace("0", "%d");

    InputSplit is = new NumberedFileInputSplit(path, 0, 2);

    RecordReader rr = new JacksonRecordReader(getFieldSelection(), new ObjectMapper(new YAMLFactory()));
    rr.initialize(is);

    testJacksonRecordReader(rr);
}
 
Example 10
Source File: RegexRecordReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testRegexLineRecordReaderMeta() throws Exception {
    String regex = "(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (\\d+) ([A-Z]+) (.*)";

    RecordReader rr = new RegexLineRecordReader(regex, 1);
    rr.initialize(new FileSplit(new ClassPathResource("/logtestdata/logtestfile0.txt").getFile()));

    List<List<Writable>> list = new ArrayList<>();
    while (rr.hasNext()) {
        list.add(rr.next());
    }
    assertEquals(3, list.size());

    List<Record> list2 = new ArrayList<>();
    List<List<Writable>> list3 = new ArrayList<>();
    List<RecordMetaData> meta = new ArrayList<>();
    rr.reset();
    int count = 1; //Start by skipping 1 line
    while (rr.hasNext()) {
        Record r = rr.nextRecord();
        list2.add(r);
        list3.add(r.getRecord());
        meta.add(r.getMetaData());

        assertEquals(count++, ((RecordMetaDataLine) r.getMetaData()).getLineNumber());
    }

    List<Record> fromMeta = rr.loadFromMetaData(meta);

    assertEquals(list, list3);
    assertEquals(list2, fromMeta);
}
 
Example 11
Source File: LFWLoader.java    From DataVec with Apache License 2.0 5 votes vote down vote up
public RecordReader getRecordReader(int batchSize, int numExamples, int[] imgDim, int numLabels,
                PathLabelGenerator labelGenerator, boolean train, double splitTrainTest, Random rng) {
    load(batchSize, numExamples, numLabels, labelGenerator, splitTrainTest, rng);
    RecordReader recordReader =
                    new ImageRecordReader(imgDim[0], imgDim[1], imgDim[2], labelGenerator, imageTransform);

    try {
        InputSplit data = train ? inputSplit[0] : inputSplit[1];
        recordReader.initialize(data);
    } catch (IOException | InterruptedException e) {
        e.printStackTrace();
    }
    return recordReader;
}
 
Example 12
Source File: JacksonLineRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
   public void testReadJSON() throws Exception {
      
       RecordReader rr = new JacksonLineRecordReader(getFieldSelection(), new ObjectMapper(new JsonFactory()));
       rr.initialize(new FileSplit(new ClassPathResource("datavec-api/json/json_test_3.txt").getFile()));
       
       testJacksonRecordReader(rr);
}
 
Example 13
Source File: MatlabInputFormat.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Override
public RecordReader createReader(InputSplit split) throws IOException, InterruptedException {
    RecordReader reader = new MatlabRecordReader();
    reader.initialize(split);
    return reader;
}
 
Example 14
Source File: WavInputFormat.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public RecordReader createReader(InputSplit split) throws IOException, InterruptedException {
    RecordReader waveRecordReader = new WavFileRecordReader();
    waveRecordReader.initialize(split);
    return waveRecordReader;
}
 
Example 15
Source File: RecordReaderMultiDataSetIteratorTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testSplittingCSV() throws Exception {
    //Here's the idea: take Iris, and split it up into 2 inputs and 2 output arrays
    //Inputs: columns 0 and 1-2
    //Outputs: columns 3, and 4->OneHot
    //need to manually extract
    RecordReader rr = new CSVRecordReader(0, ',');
    rr.initialize(new FileSplit(Resources.asFile("iris.txt")));
    RecordReaderDataSetIterator rrdsi = new RecordReaderDataSetIterator(rr, 10, 4, 3);

    RecordReader rr2 = new CSVRecordReader(0, ',');
    rr2.initialize(new FileSplit(Resources.asFile("iris.txt")));

    MultiDataSetIterator rrmdsi = new RecordReaderMultiDataSetIterator.Builder(10).addReader("reader", rr2)
                    .addInput("reader", 0, 0).addInput("reader", 1, 2).addOutput("reader", 3, 3)
                    .addOutputOneHot("reader", 4, 3).build();

    while (rrdsi.hasNext()) {
        DataSet ds = rrdsi.next();
        INDArray fds = ds.getFeatures();
        INDArray lds = ds.getLabels();

        MultiDataSet mds = rrmdsi.next();
        assertEquals(2, mds.getFeatures().length);
        assertEquals(2, mds.getLabels().length);
        assertNull(mds.getFeaturesMaskArrays());
        assertNull(mds.getLabelsMaskArrays());
        INDArray[] fmds = mds.getFeatures();
        INDArray[] lmds = mds.getLabels();

        assertNotNull(fmds);
        assertNotNull(lmds);
        for (int i = 0; i < fmds.length; i++)
            assertNotNull(fmds[i]);
        for (int i = 0; i < lmds.length; i++)
            assertNotNull(lmds[i]);

        //Get the subsets of the original iris data
        INDArray expIn1 = fds.get(all(), interval(0,0,true));
        INDArray expIn2 = fds.get(all(), interval(1, 2, true));
        INDArray expOut1 = fds.get(all(), interval(3,3,true));
        INDArray expOut2 = lds;

        assertEquals(expIn1, fmds[0]);
        assertEquals(expIn2, fmds[1]);
        assertEquals(expOut1, lmds[0]);
        assertEquals(expOut2, lmds[1]);
    }
    assertFalse(rrmdsi.hasNext());
}
 
Example 16
Source File: DataSetIteratorHelper.java    From Java-Deep-Learning-Cookbook with MIT License 4 votes vote down vote up
public static RecordReader generateReader(File file) throws IOException, InterruptedException {
    final RecordReader recordReader = new CSVRecordReader(1,',');
    recordReader.initialize(new FileSplit(file));
    final RecordReader transformProcessRecordReader=applyTransform(recordReader,generateSchema());
    return transformProcessRecordReader;
}
 
Example 17
Source File: TextInputFormat.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Override
public RecordReader createReader(InputSplit split, Configuration conf) throws IOException, InterruptedException {
    RecordReader reader = new TfidfRecordReader();
    reader.initialize(conf, split);
    return reader;
}
 
Example 18
Source File: RecordReaderDataSetiteratorTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testCSVLoadingRegression() throws Exception {
    int nLines = 30;
    int nFeatures = 5;
    int miniBatchSize = 10;
    int labelIdx = 0;

    String path = "rr_csv_test_rand.csv";
    Pair<double[][],File> p = makeRandomCSV(path, nLines, nFeatures);
    double[][] data = p.getFirst();
    RecordReader testReader = new CSVRecordReader();
    testReader.initialize(new FileSplit(p.getSecond()));

    DataSetIterator iter = new RecordReaderDataSetIterator(testReader, miniBatchSize, labelIdx, labelIdx, true);
    int miniBatch = 0;
    while (iter.hasNext()) {
        DataSet test = iter.next();
        INDArray features = test.getFeatures();
        INDArray labels = test.getLabels();
        assertArrayEquals(new long[] {miniBatchSize, nFeatures}, features.shape());
        assertArrayEquals(new long[] {miniBatchSize, 1}, labels.shape());

        int startRow = miniBatch * miniBatchSize;
        for (int i = 0; i < miniBatchSize; i++) {
            double labelExp = data[startRow + i][labelIdx];
            double labelAct = labels.getDouble(i);
            assertEquals(labelExp, labelAct, 1e-5f);

            int featureCount = 0;
            for (int j = 0; j < nFeatures + 1; j++) {
                if (j == labelIdx)
                    continue;
                double featureExp = data[startRow + i][j];
                double featureAct = features.getDouble(i, featureCount++);
                assertEquals(featureExp, featureAct, 1e-5f);
            }
        }

        miniBatch++;
    }
    assertEquals(nLines / miniBatchSize, miniBatch);
}
 
Example 19
Source File: CustomerRetentionPredictionExample.java    From Java-Deep-Learning-Cookbook with MIT License 4 votes vote down vote up
private static RecordReader generateReader(File file) throws IOException, InterruptedException {
    final RecordReader recordReader = new CSVRecordReader(1,',');
    recordReader.initialize(new FileSplit(file));
    final RecordReader transformProcessRecordReader=applyTransform(recordReader,generateSchema());
    return transformProcessRecordReader;
}
 
Example 20
Source File: JacksonRecordReaderTest.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Test
public void testAppendingLabels() throws Exception {
    ClassPathResource cpr = new ClassPathResource("json/json_test_0.txt");
    String path = cpr.getFile().getAbsolutePath().replace("0", "%d");

    InputSplit is = new NumberedFileInputSplit(path, 0, 2);

    //Insert at the end:
    RecordReader rr = new JacksonRecordReader(getFieldSelection(), new ObjectMapper(new JsonFactory()), false, -1,
                    new LabelGen());
    rr.initialize(is);

    List<Writable> exp0 = Arrays.asList((Writable) new Text("aValue0"), new Text("bValue0"), new Text("cxValue0"),
                    new IntWritable(0));
    assertEquals(exp0, rr.next());

    List<Writable> exp1 = Arrays.asList((Writable) new Text("aValue1"), new Text("MISSING_B"), new Text("cxValue1"),
                    new IntWritable(1));
    assertEquals(exp1, rr.next());

    List<Writable> exp2 = Arrays.asList((Writable) new Text("aValue2"), new Text("bValue2"), new Text("MISSING_CX"),
                    new IntWritable(2));
    assertEquals(exp2, rr.next());

    //Insert at position 0:
    rr = new JacksonRecordReader(getFieldSelection(), new ObjectMapper(new JsonFactory()), false, -1,
                    new LabelGen(), 0);
    rr.initialize(is);

    exp0 = Arrays.asList((Writable) new IntWritable(0), new Text("aValue0"), new Text("bValue0"),
                    new Text("cxValue0"));
    assertEquals(exp0, rr.next());

    exp1 = Arrays.asList((Writable) new IntWritable(1), new Text("aValue1"), new Text("MISSING_B"),
                    new Text("cxValue1"));
    assertEquals(exp1, rr.next());

    exp2 = Arrays.asList((Writable) new IntWritable(2), new Text("aValue2"), new Text("bValue2"),
                    new Text("MISSING_CX"));
    assertEquals(exp2, rr.next());
}