org.datavec.api.split.FileSplit Java Examples

The following examples show how to use org.datavec.api.split.FileSplit. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SvhnDataFetcher.java    From deeplearning4j with Apache License 2.0 7 votes vote down vote up
@Override
public RecordReader getRecordReader(long rngSeed, int[] imgDim, DataSetType set, ImageTransform imageTransform) {
    try {
        Random rng = new Random(rngSeed);
        File datasetPath = getDataSetPath(set);

        FileSplit data = new FileSplit(datasetPath, BaseImageLoader.ALLOWED_FORMATS, rng);
        ObjectDetectionRecordReader recordReader = new ObjectDetectionRecordReader(imgDim[1], imgDim[0], imgDim[2],
                        imgDim[4], imgDim[3], null);

        recordReader.initialize(data);
        return recordReader;
    } catch (IOException e) {
        throw new RuntimeException("Could not download SVHN", e);
    }
}
 
Example #2
Source File: TfidfRecordReaderTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testReadRecordFromMetaData() throws Exception {
    TfidfVectorizer vectorizer = new TfidfVectorizer();
    Configuration conf = new Configuration();
    conf.setInt(TfidfVectorizer.MIN_WORD_FREQUENCY, 1);
    conf.setBoolean(RecordReader.APPEND_LABEL, true);
    vectorizer.initialize(conf);
    TfidfRecordReader reader = new TfidfRecordReader();
    File f = testDir.newFolder();
    new ClassPathResource("datavec-data-nlp/labeled/").copyDirectory(f);
    reader.initialize(conf, new FileSplit(f));

    Record record = reader.nextRecord();

    Record reread = reader.loadFromMetaData(record.getMetaData());

    assertEquals(record.getRecord().size(), 2);
    assertEquals(reread.getRecord().size(), 2);
    assertEquals(record.getRecord().get(0), reread.getRecord().get(0));
    assertEquals(record.getRecord().get(1), reread.getRecord().get(1));
    assertEquals(record.getMetaData(), reread.getMetaData());
}
 
Example #3
Source File: ExcelRecordWriterTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriter() throws Exception  {
    ExcelRecordWriter excelRecordWriter = new ExcelRecordWriter();
    val records = records();
    File tmpDir = testDir.newFolder();
    File outputFile = new File(tmpDir,"testexcel.xlsx");
    outputFile.deleteOnExit();
    FileSplit fileSplit = new FileSplit(outputFile);
    excelRecordWriter.initialize(fileSplit,new NumberOfRecordsPartitioner());
    excelRecordWriter.writeBatch(records.getRight());
    excelRecordWriter.close();
    File parentFile = outputFile.getParentFile();
    assertEquals(1,parentFile.list().length);

    ExcelRecordReader excelRecordReader = new ExcelRecordReader();
    excelRecordReader.initialize(fileSplit);
    List<List<Writable>> next = excelRecordReader.next(10);
    assertEquals(10,next.size());

}
 
Example #4
Source File: ExcelRecordReaderTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testSimple() throws Exception {
    RecordReader excel = new ExcelRecordReader();
    excel.initialize(new FileSplit(new ClassPathResource("datavec-excel/testsheet.xlsx").getFile()));
    assertTrue(excel.hasNext());
    List<Writable> next = excel.next();
    assertEquals(3,next.size());

    RecordReader headerReader = new ExcelRecordReader(1);
    headerReader.initialize(new FileSplit(new ClassPathResource("datavec-excel/testsheetheader.xlsx").getFile()));
    assertTrue(excel.hasNext());
    List<Writable> next2 = excel.next();
    assertEquals(3,next2.size());


}
 
Example #5
Source File: LibSvmRecordWriterTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test(expected = NumberFormatException.class)
public void nonIntegerMultilabel() throws Exception {
    List<Writable> record = Arrays.asList((Writable) new IntWritable(3),
                                            new IntWritable(2),
                                            new DoubleWritable(1.2));
    File tempFile = File.createTempFile("LibSvmRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    try (LibSvmRecordWriter writer = new LibSvmRecordWriter()) {
        Configuration configWriter = new Configuration();
        configWriter.setInt(LibSvmRecordWriter.FEATURE_FIRST_COLUMN, 0);
        configWriter.setInt(LibSvmRecordWriter.FEATURE_LAST_COLUMN, 1);
        configWriter.setBoolean(LibSvmRecordWriter.MULTILABEL, true);
        FileSplit outputSplit = new FileSplit(tempFile);
        writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        writer.write(record);
    }
}
 
Example #6
Source File: TransformProcessRecordReaderTests.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void simpleTransformTest() throws Exception {
    Schema schema = new Schema.Builder()
            .addColumnsDouble("%d", 0, 4)
            .build();
    TransformProcess transformProcess = new TransformProcess.Builder(schema).removeColumns("0").build();
    CSVRecordReader csvRecordReader = new CSVRecordReader();
    csvRecordReader.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));
    TransformProcessRecordReader rr =
                    new TransformProcessRecordReader(csvRecordReader, transformProcess);
    int count = 0;
    List<List<Writable>> all = new ArrayList<>();
    while(rr.hasNext()){
        List<Writable> next = rr.next();
        assertEquals(4, next.size());
        count++;
        all.add(next);
    }
    assertEquals(150, count);

    //Test batch:
    assertTrue(rr.resetSupported());
    rr.reset();
    List<List<Writable>> batch = rr.next(150);
    assertEquals(all, batch);
}
 
Example #7
Source File: TfidfRecordReaderTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testReadRecordFromMetaData() throws Exception {
    TfidfVectorizer vectorizer = new TfidfVectorizer();
    Configuration conf = new Configuration();
    conf.setInt(TfidfVectorizer.MIN_WORD_FREQUENCY, 1);
    conf.setBoolean(RecordReader.APPEND_LABEL, true);
    vectorizer.initialize(conf);
    TfidfRecordReader reader = new TfidfRecordReader();
    reader.initialize(conf, new FileSplit(new ClassPathResource("labeled").getFile()));

    Record record = reader.nextRecord();

    Record reread = reader.loadFromMetaData(record.getMetaData());

    assertEquals(record.getRecord().size(), 2);
    assertEquals(reread.getRecord().size(), 2);
    assertEquals(record.getRecord().get(0), reread.getRecord().get(0));
    assertEquals(record.getRecord().get(1), reread.getRecord().get(1));
    assertEquals(record.getMetaData(), reread.getMetaData());
}
 
Example #8
Source File: TransformProcessRecordReaderTests.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void simpleTransformTest() throws Exception {
    Schema schema = new Schema.Builder()
            .addColumnsDouble("%d", 0, 4)
            .build();
    TransformProcess transformProcess = new TransformProcess.Builder(schema).removeColumns("0").build();
    CSVRecordReader csvRecordReader = new CSVRecordReader();
    csvRecordReader.initialize(new FileSplit(new ClassPathResource("datavec-api/iris.dat").getFile()));
    TransformProcessRecordReader rr =
                    new TransformProcessRecordReader(csvRecordReader, transformProcess);
    int count = 0;
    List<List<Writable>> all = new ArrayList<>();
    while(rr.hasNext()){
        List<Writable> next = rr.next();
        assertEquals(4, next.size());
        count++;
        all.add(next);
    }
    assertEquals(150, count);

    //Test batch:
    assertTrue(rr.resetSupported());
    rr.reset();
    List<List<Writable>> batch = rr.next(150);
    assertEquals(all, batch);
}
 
Example #9
Source File: FileRecordReaderTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testReset() throws Exception {
    FileRecordReader rr = new FileRecordReader();
    rr.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));

    int nResets = 5;
    for (int i = 0; i < nResets; i++) {

        int lineCount = 0;
        while (rr.hasNext()) {
            List<Writable> line = rr.next();
            assertEquals(1, line.size());
            lineCount++;
        }
        assertFalse(rr.hasNext());
        assertEquals(1, lineCount);
        rr.reset();
    }
}
 
Example #10
Source File: CodecReaderTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testCodecReaderMeta() throws Exception {
    File file = new ClassPathResource("fire_lowres.mp4").getFile();
    SequenceRecordReader reader = new CodecRecordReader();
    Configuration conf = new Configuration();
    conf.set(CodecRecordReader.RAVEL, "true");
    conf.set(CodecRecordReader.START_FRAME, "160");
    conf.set(CodecRecordReader.TOTAL_FRAMES, "500");
    conf.set(CodecRecordReader.ROWS, "80");
    conf.set(CodecRecordReader.COLUMNS, "46");
    reader.initialize(new FileSplit(file));
    reader.setConf(conf);
    assertTrue(reader.hasNext());
    List<List<Writable>> record = reader.sequenceRecord();
    assertEquals(500, record.size()); //500 frames

    reader.reset();
    SequenceRecord seqR = reader.nextSequence();
    assertEquals(record, seqR.getSequenceRecord());
    RecordMetaData meta = seqR.getMetaData();
    //        System.out.println(meta);
    assertTrue(meta.getURI().toString().endsWith("fire_lowres.mp4"));

    SequenceRecord fromMeta = reader.loadSequenceFromMetaData(meta);
    assertEquals(seqR, fromMeta);
}
 
Example #11
Source File: CodecReaderTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testCodecReaderMeta() throws Exception {
    File file = new ClassPathResource("datavec-data-codec/fire_lowres.mp4").getFile();
    SequenceRecordReader reader = new CodecRecordReader();
    Configuration conf = new Configuration();
    conf.set(CodecRecordReader.RAVEL, "true");
    conf.set(CodecRecordReader.START_FRAME, "160");
    conf.set(CodecRecordReader.TOTAL_FRAMES, "500");
    conf.set(CodecRecordReader.ROWS, "80");
    conf.set(CodecRecordReader.COLUMNS, "46");
    reader.initialize(new FileSplit(file));
    reader.setConf(conf);
    assertTrue(reader.hasNext());
    List<List<Writable>> record = reader.sequenceRecord();
    assertEquals(500, record.size()); //500 frames

    reader.reset();
    SequenceRecord seqR = reader.nextSequence();
    assertEquals(record, seqR.getSequenceRecord());
    RecordMetaData meta = seqR.getMetaData();
    //        System.out.println(meta);
    assertTrue(meta.getURI().toString().endsWith(file.getName()));

    SequenceRecord fromMeta = reader.loadSequenceFromMetaData(meta);
    assertEquals(seqR, fromMeta);
}
 
Example #12
Source File: LibSvmRecordWriterTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testNonIntegerButValidMultilabel() throws Exception {
    List<Writable> record = Arrays.asList((Writable) new IntWritable(3),
            new IntWritable(2),
            new DoubleWritable(1.0));
    File tempFile = File.createTempFile("LibSvmRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    try (LibSvmRecordWriter writer = new LibSvmRecordWriter()) {
        Configuration configWriter = new Configuration();
        configWriter.setInt(LibSvmRecordWriter.FEATURE_FIRST_COLUMN, 0);
        configWriter.setInt(LibSvmRecordWriter.FEATURE_LAST_COLUMN, 1);
        configWriter.setBoolean(LibSvmRecordWriter.MULTILABEL, true);
        FileSplit outputSplit = new FileSplit(tempFile);
        writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        writer.write(record);
    }
}
 
Example #13
Source File: HyperParameterTuning.java    From Java-Deep-Learning-Cookbook with MIT License 6 votes vote down vote up
public RecordReader dataPreprocess() throws IOException, InterruptedException {
    //Schema Definitions
    Schema schema = new Schema.Builder()
            .addColumnsString("RowNumber")
            .addColumnInteger("CustomerId")
            .addColumnString("Surname")
            .addColumnInteger("CreditScore")
            .addColumnCategorical("Geography",Arrays.asList("France","Spain","Germany"))
            .addColumnCategorical("Gender",Arrays.asList("Male","Female"))
            .addColumnsInteger("Age","Tenure","Balance","NumOfProducts","HasCrCard","IsActiveMember","EstimatedSalary","Exited").build();

    //Schema Transformation
    TransformProcess transformProcess = new TransformProcess.Builder(schema)
            .removeColumns("RowNumber","Surname","CustomerId")
            .categoricalToInteger("Gender")
            .categoricalToOneHot("Geography")
            .removeColumns("Geography[France]")
            .build();

    //CSVReader - Reading from file and applying transformation
    RecordReader reader = new CSVRecordReader(1,',');
    reader.initialize(new FileSplit(new ClassPathResource("Churn_Modelling.csv").getFile()));
    RecordReader transformProcessRecordReader = new TransformProcessRecordReader(reader,transformProcess);
    return transformProcessRecordReader;
}
 
Example #14
Source File: ConvolutionLayerSetupTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testLRN() throws Exception {
    List<String> labels = new ArrayList<>(Arrays.asList("Zico", "Ziwang_Xu"));
    File dir = testDir.newFolder();
    new ClassPathResource("lfwtest/").copyDirectory(dir);
    String rootDir = dir.getAbsolutePath();

    RecordReader reader = new ImageRecordReader(28, 28, 3);
    reader.initialize(new FileSplit(new File(rootDir)));
    DataSetIterator recordReader = new RecordReaderDataSetIterator(reader, 10, 1, labels.size());
    labels.remove("lfwtest");
    NeuralNetConfiguration.ListBuilder builder = (NeuralNetConfiguration.ListBuilder) incompleteLRN();
    builder.setInputType(InputType.convolutional(28, 28, 3));

    MultiLayerConfiguration conf = builder.build();

    ConvolutionLayer layer2 = (ConvolutionLayer) conf.getConf(3).getLayer();
    assertEquals(6, layer2.getNIn());

}
 
Example #15
Source File: LFWLoader.java    From DataVec with Apache License 2.0 6 votes vote down vote up
public void load(int batchSize, int numExamples, int numLabels, PathLabelGenerator labelGenerator,
                double splitTrainTest, Random rng) {
    if (!imageFilesExist()) {
        if (!fullDir.exists() || fullDir.listFiles() == null || fullDir.listFiles().length == 0) {
            fullDir.mkdir();

            if (useSubset) {
                log.info("Downloading {} subset...", localDir);
                downloadAndUntar(lfwSubsetData, fullDir);
            } else {
                log.info("Downloading {}...", localDir);
                downloadAndUntar(lfwData, fullDir);
                downloadAndUntar(lfwLabel, fullDir);
            }
        }
    }
    FileSplit fileSplit = new FileSplit(fullDir, ALLOWED_FORMATS, rng);
    BalancedPathFilter pathFilter = new BalancedPathFilter(rng, ALLOWED_FORMATS, labelGenerator, numExamples,
                    numLabels, 0, batchSize, null);
    inputSplit = fileSplit.sample(pathFilter, numExamples * splitTrainTest, numExamples * (1 - splitTrainTest));
}
 
Example #16
Source File: MultipleEpochsIteratorTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testNextAndReset() throws Exception {
    int epochs = 3;

    RecordReader rr = new CSVRecordReader();
    rr.initialize(new FileSplit(Resources.asFile("iris.txt")));
    DataSetIterator iter = new RecordReaderDataSetIterator(rr, 150);
    MultipleEpochsIterator multiIter = new MultipleEpochsIterator(epochs, iter);

    assertTrue(multiIter.hasNext());
    while (multiIter.hasNext()) {
        DataSet path = multiIter.next();
        assertFalse(path == null);
    }
    assertEquals(epochs, multiIter.epochs);
}
 
Example #17
Source File: MultipleEpochsIteratorTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testLoadFullDataSet() throws Exception {
    int epochs = 3;

    RecordReader rr = new CSVRecordReader();
    rr.initialize(new FileSplit(Resources.asFile("iris.txt")));
    DataSetIterator iter = new RecordReaderDataSetIterator(rr, 150);
    DataSet ds = iter.next(50);

    assertEquals(50, ds.getFeatures().size(0));

    MultipleEpochsIterator multiIter = new MultipleEpochsIterator(epochs, ds);

    assertTrue(multiIter.hasNext());
    int count = 0;
    while (multiIter.hasNext()) {
        DataSet path = multiIter.next();
        assertNotNull(path);
        assertEquals(50, path.numExamples(), 0);
        count++;
    }
    assertEquals(epochs, count);
    assertEquals(epochs, multiIter.epochs);
}
 
Example #18
Source File: CSVRecordReaderTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test(expected = NoSuchElementException.class)
public void testCsvSkipAllLines() throws IOException, InterruptedException {
    final int numLines = 4;
    final List<Writable> lineList = Arrays.asList((Writable) new IntWritable(numLines - 1),
                    (Writable) new Text("one"), (Writable) new Text("two"), (Writable) new Text("three"));
    String header = ",one,two,three";
    List<String> lines = new ArrayList<>();
    for (int i = 0; i < numLines; i++)
        lines.add(Integer.toString(i) + header);
    File tempFile = File.createTempFile("csvSkipLines", ".csv");
    FileUtils.writeLines(tempFile, lines);

    CSVRecordReader rr = new CSVRecordReader(numLines, ',');
    rr.initialize(new FileSplit(tempFile));
    rr.reset();
    assertTrue(!rr.hasNext());
    rr.next();
}
 
Example #19
Source File: SVMLightRecordWriterTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testNonIntegerButValidMultilabel() throws Exception {
    List<Writable> record = Arrays.asList((Writable) new IntWritable(3),
            new IntWritable(2),
            new DoubleWritable(1.0));
    File tempFile = File.createTempFile("SVMLightRecordWriter", ".txt");
    tempFile.setWritable(true);
    tempFile.deleteOnExit();
    if (tempFile.exists())
        tempFile.delete();

    try (SVMLightRecordWriter writer = new SVMLightRecordWriter()) {
        Configuration configWriter = new Configuration();
        configWriter.setInt(SVMLightRecordWriter.FEATURE_FIRST_COLUMN, 0);
        configWriter.setInt(SVMLightRecordWriter.FEATURE_LAST_COLUMN, 1);
        configWriter.setBoolean(SVMLightRecordWriter.MULTILABEL, true);
        FileSplit outputSplit = new FileSplit(tempFile);
        writer.initialize(configWriter,outputSplit,new NumberOfRecordsPartitioner());
        writer.write(record);
    }
}
 
Example #20
Source File: TfidfRecordReaderTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testRecordMetaData() throws Exception {
    TfidfVectorizer vectorizer = new TfidfVectorizer();
    Configuration conf = new Configuration();
    conf.setInt(TfidfVectorizer.MIN_WORD_FREQUENCY, 1);
    conf.setBoolean(RecordReader.APPEND_LABEL, true);
    vectorizer.initialize(conf);
    TfidfRecordReader reader = new TfidfRecordReader();
    File f = testDir.newFolder();
    new ClassPathResource("datavec-data-nlp/labeled/").copyDirectory(f);
    reader.initialize(conf, new FileSplit(f));

    while (reader.hasNext()) {
        Record record = reader.nextRecord();
        assertNotNull(record.getMetaData().getURI());
        assertEquals(record.getMetaData().getReaderClass(), TfidfRecordReader.class);
    }
}
 
Example #21
Source File: ArrowBinaryInputAdapterTest.java    From konduit-serving with Apache License 2.0 6 votes vote down vote up
@Test(timeout = 60000)

    public void testArrowBinary() throws Exception {
        Schema irisInputSchema = TrainUtils.getIrisInputSchema();
        ArrowRecordWriter arrowRecordWriter = new ArrowRecordWriter(irisInputSchema);
        CSVRecordReader reader = new CSVRecordReader();
        reader.initialize(new FileSplit(new ClassPathResource("iris.txt").getFile()));
        List<List<Writable>> writables = reader.next(150);

        File tmpFile = new File(temporary.getRoot(), "tmp.arrow");
        FileSplit fileSplit = new FileSplit(tmpFile);
        arrowRecordWriter.initialize(fileSplit, new NumberOfRecordsPartitioner());
        arrowRecordWriter.writeBatch(writables);
        byte[] arrowBytes = FileUtils.readFileToByteArray(tmpFile);

        Buffer buffer = Buffer.buffer(arrowBytes);
        ArrowBinaryInputAdapter arrowBinaryInputAdapter = new ArrowBinaryInputAdapter();
        ArrowWritableRecordBatch convert = arrowBinaryInputAdapter.convert(buffer, ConverterArgs.builder().schema(irisInputSchema).build(), null);
        assertEquals(writables.size(), convert.size());
    }
 
Example #22
Source File: DataSetIteratorTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testMnist() throws Exception {
    ClassPathResource cpr = new ClassPathResource("mnist_first_200.txt");
    CSVRecordReader rr = new CSVRecordReader(0, ',');
    rr.initialize(new FileSplit(cpr.getTempFileFromArchive()));
    RecordReaderDataSetIterator dsi = new RecordReaderDataSetIterator(rr, 10, 0, 10);

    MnistDataSetIterator iter = new MnistDataSetIterator(10, 200, false, true, false, 0);

    while (dsi.hasNext()) {
        DataSet dsExp = dsi.next();
        DataSet dsAct = iter.next();

        INDArray fExp = dsExp.getFeatures();
        fExp.divi(255);
        INDArray lExp = dsExp.getLabels();

        INDArray fAct = dsAct.getFeatures();
        INDArray lAct = dsAct.getLabels();

        assertEquals(fExp, fAct.castTo(fExp.dataType()));
        assertEquals(lExp, lAct.castTo(lExp.dataType()));
    }
    assertFalse(iter.hasNext());
}
 
Example #23
Source File: ArrowRecordReader.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public Record loadFromMetaData(RecordMetaData recordMetaData) {
    if(!(recordMetaData instanceof RecordMetaDataIndex)) {
        throw new IllegalArgumentException("Unable to load from meta data. No index specified for record");
    }

    RecordMetaDataIndex index = (RecordMetaDataIndex) recordMetaData;
    InputSplit fileSplit = new FileSplit(new File(index.getURI()));
    initialize(fileSplit);
    this.currIdx = (int) index.getIndex();
    return nextRecord();
}
 
Example #24
Source File: ArrowRecordReader.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public List<Record> loadFromMetaData(List<RecordMetaData> recordMetaDatas) {
    Map<String,List<RecordMetaData>> metaDataByUri = new HashMap<>();
    //gather all unique locations for the metadata
    //this will prevent initialization multiple times of the record
    for(RecordMetaData recordMetaData : recordMetaDatas) {
        if(!(recordMetaData instanceof RecordMetaDataIndex)) {
            throw new IllegalArgumentException("Unable to load from meta data. No index specified for record");
        }

        List<RecordMetaData> recordMetaData1 = metaDataByUri.get(recordMetaData.getURI().toString());
        if(recordMetaData1 == null) {
            recordMetaData1 = new ArrayList<>();
            metaDataByUri.put(recordMetaData.getURI().toString(),recordMetaData1);
        }

        recordMetaData1.add(recordMetaData);

    }

    List<Record> ret = new ArrayList<>();
    for(String uri : metaDataByUri.keySet()) {
        List<RecordMetaData> metaData = metaDataByUri.get(uri);
        InputSplit fileSplit = new FileSplit(new File(URI.create(uri)));
        initialize(fileSplit);
        for(RecordMetaData index : metaData) {
            RecordMetaDataIndex index2 = (RecordMetaDataIndex) index;
            this.currIdx = (int) index2.getIndex();
            ret.add(nextRecord());
        }

    }

    return ret;
}
 
Example #25
Source File: ArrowConverterTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateNDArray() throws Exception {
    val recordsToWrite = recordToWrite();
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    ArrowConverter.writeRecordBatchTo(recordsToWrite.getRight(),recordsToWrite.getFirst(),byteArrayOutputStream);

    File tmpFile = new File("tmp-arrow-file-" + UUID.randomUUID().toString() + ".arrorw");
    FileOutputStream outputStream = new FileOutputStream(tmpFile);
    tmpFile.deleteOnExit();
    ArrowConverter.writeRecordBatchTo(recordsToWrite.getRight(),recordsToWrite.getFirst(),outputStream);
    outputStream.flush();
    outputStream.close();

    Pair<Schema, ArrowWritableRecordBatch> schemaArrowWritableRecordBatchPair = ArrowConverter.readFromFile(tmpFile);
    assertEquals(recordsToWrite.getFirst(),schemaArrowWritableRecordBatchPair.getFirst());
    assertEquals(recordsToWrite.getRight(),schemaArrowWritableRecordBatchPair.getRight().toArrayList());

    byte[] arr = byteArrayOutputStream.toByteArray();
    val read = ArrowConverter.readFromBytes(arr);
    assertEquals(recordsToWrite,read);

    //send file
    File tmp =  tmpDataFile(recordsToWrite);
    ArrowRecordReader recordReader = new ArrowRecordReader();

    recordReader.initialize(new FileSplit(tmp));

    recordReader.next();
    ArrowWritableRecordBatch currentBatch = recordReader.getCurrentBatch();
    INDArray arr2 = ArrowConverter.toArray(currentBatch);
    assertEquals(2,arr2.rows());
    assertEquals(2,arr2.columns());
}
 
Example #26
Source File: SVMLightRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test(expected = IndexOutOfBoundsException.class)
public void testZeroIndexFeatureWithoutUsingZeroIndexing() throws Exception {
    SVMLightRecordReader rr = new SVMLightRecordReader();
    Configuration config = new Configuration();
    config.setBoolean(SVMLightRecordReader.ZERO_BASED_INDEXING, false);
    config.setInt(SVMLightRecordReader.NUM_FEATURES, 10);
    rr.initialize(config, new FileSplit(new ClassPathResource("datavec-api/svmlight/zeroIndexFeature.txt").getFile()));
    rr.next();
}
 
Example #27
Source File: LibSvmRecordReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test(expected = UnsupportedOperationException.class)
public void testInconsistentNumLabelsException() throws Exception {
    LibSvmRecordReader rr = new LibSvmRecordReader();
    Configuration config = new Configuration();
    config.setBoolean(LibSvmRecordReader.ZERO_BASED_INDEXING, false);
    rr.initialize(config, new FileSplit(new ClassPathResource("svmlight/inconsistentNumLabels.txt").getFile()));
    while (rr.hasNext())
        rr.next();
}
 
Example #28
Source File: CodecReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Ignore
@Test
public void testNativeCodecReaderMeta() throws Exception {
    File file = new ClassPathResource("fire_lowres.mp4").getFile();
    SequenceRecordReader reader = new NativeCodecRecordReader();
    Configuration conf = new Configuration();
    conf.set(CodecRecordReader.RAVEL, "true");
    conf.set(CodecRecordReader.START_FRAME, "160");
    conf.set(CodecRecordReader.TOTAL_FRAMES, "500");
    conf.set(CodecRecordReader.ROWS, "80");
    conf.set(CodecRecordReader.COLUMNS, "46");
    reader.initialize(new FileSplit(file));
    reader.setConf(conf);
    assertTrue(reader.hasNext());
    List<List<Writable>> record = reader.sequenceRecord();
    assertEquals(500, record.size()); //500 frames

    reader.reset();
    SequenceRecord seqR = reader.nextSequence();
    assertEquals(record, seqR.getSequenceRecord());
    RecordMetaData meta = seqR.getMetaData();
    //        System.out.println(meta);
    assertTrue(meta.getURI().toString().endsWith("fire_lowres.mp4"));

    SequenceRecord fromMeta = reader.loadSequenceFromMetaData(meta);
    assertEquals(seqR, fromMeta);
}
 
Example #29
Source File: CodecReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Ignore
@Test
public void testNativeViaDataInputStream() throws Exception {

    File file = new ClassPathResource("fire_lowres.mp4").getFile();
    SequenceRecordReader reader = new NativeCodecRecordReader();
    Configuration conf = new Configuration();
    conf.set(CodecRecordReader.RAVEL, "true");
    conf.set(CodecRecordReader.START_FRAME, "160");
    conf.set(CodecRecordReader.TOTAL_FRAMES, "500");
    conf.set(CodecRecordReader.ROWS, "80");
    conf.set(CodecRecordReader.COLUMNS, "46");

    Configuration conf2 = new Configuration(conf);

    reader.initialize(new FileSplit(file));
    reader.setConf(conf);
    assertTrue(reader.hasNext());
    List<List<Writable>> expected = reader.sequenceRecord();


    SequenceRecordReader reader2 = new NativeCodecRecordReader();
    reader2.setConf(conf2);

    DataInputStream dataInputStream = new DataInputStream(new FileInputStream(file));
    List<List<Writable>> actual = reader2.sequenceRecord(null, dataInputStream);

    assertEquals(expected, actual);
}
 
Example #30
Source File: TestImageRecordReader.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testListenerInvocationSingle() throws IOException {
    ParentPathLabelGenerator labelMaker = new ParentPathLabelGenerator();
    ImageRecordReader rr = new ImageRecordReader(32, 32, 3, labelMaker);
    File parent = testDir.newFolder();
    new ClassPathResource("datavec-data-image/testimages/class0/").copyDirectory(parent);
    int numFiles = parent.list().length;
    rr.initialize(new FileSplit(parent));
    CountingListener counting = new CountingListener(new LogRecordListener());
    rr.setListeners(counting);
    while(rr.hasNext()) {
        rr.next();
    }
    assertEquals(numFiles, counting.getCount());
}