org.datavec.api.records.reader.RecordReader Java Examples

The following examples show how to use org.datavec.api.records.reader.RecordReader. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ExcelRecordReaderTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testSimple() throws Exception {
    RecordReader excel = new ExcelRecordReader();
    excel.initialize(new FileSplit(new ClassPathResource("testsheet.xlsx").getFile()));
    assertTrue(excel.hasNext());
    List<Writable> next = excel.next();
    assertEquals(3,next.size());

    RecordReader headerReader = new ExcelRecordReader(1);
    headerReader.initialize(new FileSplit(new ClassPathResource("testsheetheader.xlsx").getFile()));
    assertTrue(excel.hasNext());
    List<Writable> next2 = excel.next();
    assertEquals(3,next2.size());


}
 
Example #2
Source File: RegexRecordReaderTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testRegexLineRecordReader() throws Exception {
    String regex = "(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (\\d+) ([A-Z]+) (.*)";

    RecordReader rr = new RegexLineRecordReader(regex, 1);
    rr.initialize(new FileSplit(new ClassPathResource("datavec-api/logtestdata/logtestfile0.txt").getFile()));

    List<Writable> exp0 = Arrays.asList((Writable) new Text("2016-01-01 23:59:59.001"), new Text("1"),
                    new Text("DEBUG"), new Text("First entry message!"));
    List<Writable> exp1 = Arrays.asList((Writable) new Text("2016-01-01 23:59:59.002"), new Text("2"),
                    new Text("INFO"), new Text("Second entry message!"));
    List<Writable> exp2 = Arrays.asList((Writable) new Text("2016-01-01 23:59:59.003"), new Text("3"),
                    new Text("WARN"), new Text("Third entry message!"));
    assertEquals(exp0, rr.next());
    assertEquals(exp1, rr.next());
    assertEquals(exp2, rr.next());
    assertFalse(rr.hasNext());

    //Test reset:
    rr.reset();
    assertEquals(exp0, rr.next());
    assertEquals(exp1, rr.next());
    assertEquals(exp2, rr.next());
    assertFalse(rr.hasNext());
}
 
Example #3
Source File: RecordReaderDataSetiteratorTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testRecordReaderDataSetIteratorConcat() {

    //[DoubleWritable, DoubleWritable, NDArrayWritable([1,10]), IntWritable] -> concatenate to a [1,13] feature vector automatically.

    List<Writable> l = Arrays.<Writable>asList(new DoubleWritable(1),
                    new NDArrayWritable(Nd4j.create(new double[] {2, 3, 4})), new DoubleWritable(5),
                    new NDArrayWritable(Nd4j.create(new double[] {6, 7, 8})), new IntWritable(9),
                    new IntWritable(1));

    RecordReader rr = new CollectionRecordReader(Collections.singletonList(l));

    DataSetIterator iter = new RecordReaderDataSetIterator(rr, 1, 5, 3);

    DataSet ds = iter.next();
    INDArray expF = Nd4j.create(new float[] {1, 2, 3, 4, 5, 6, 7, 8, 9}, new int[]{1,9});
    INDArray expL = Nd4j.create(new float[] {0, 1, 0}, new int[]{1,3});

    assertEquals(expF, ds.getFeatures());
    assertEquals(expL, ds.getLabels());
}
 
Example #4
Source File: RecordReaderMultiDataSetIteratorTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testsBasicMeta() throws Exception {
    //As per testBasic - but also loading metadata
    RecordReader rr2 = new CSVRecordReader(0, ',');
    rr2.initialize(new FileSplit(Resources.asFile("iris.txt")));

    RecordReaderMultiDataSetIterator rrmdsi = new RecordReaderMultiDataSetIterator.Builder(10)
                    .addReader("reader", rr2).addInput("reader", 0, 3).addOutputOneHot("reader", 4, 3).build();

    rrmdsi.setCollectMetaData(true);

    int count = 0;
    while (rrmdsi.hasNext()) {
        MultiDataSet mds = rrmdsi.next();
        MultiDataSet fromMeta = rrmdsi.loadFromMetaData(mds.getExampleMetaData(RecordMetaData.class));
        assertEquals(mds, fromMeta);
        count++;
    }
    assertEquals(150 / 10, count);
}
 
Example #5
Source File: StringToDataSetExportFunction.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
private void processBatchIfRequired(List<List<Writable>> list, boolean finalRecord) throws Exception {
    if (list.isEmpty())
        return;
    if (list.size() < batchSize && !finalRecord)
        return;

    RecordReader rr = new CollectionRecordReader(list);
    RecordReaderDataSetIterator iter = new RecordReaderDataSetIterator(rr, null, batchSize, labelIndex, labelIndex, numPossibleLabels, -1, regression);

    DataSet ds = iter.next();

    String filename = "dataset_" + uid + "_" + (outputCount++) + ".bin";

    URI uri = new URI(outputDir.getPath() + "/" + filename);
    Configuration c = conf == null ? DefaultHadoopConfig.get() : conf.getValue().getConfiguration();
    FileSystem file = FileSystem.get(uri, c);
    try (FSDataOutputStream out = file.create(new Path(uri))) {
        ds.save(out);
    }

    list.clear();
}
 
Example #6
Source File: ConvolutionLayerSetupTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testLRN() throws Exception {
    List<String> labels = new ArrayList<>(Arrays.asList("Zico", "Ziwang_Xu"));
    File dir = testDir.newFolder();
    new ClassPathResource("lfwtest/").copyDirectory(dir);
    String rootDir = dir.getAbsolutePath();

    RecordReader reader = new ImageRecordReader(28, 28, 3);
    reader.initialize(new FileSplit(new File(rootDir)));
    DataSetIterator recordReader = new RecordReaderDataSetIterator(reader, 10, 1, labels.size());
    labels.remove("lfwtest");
    NeuralNetConfiguration.ListBuilder builder = (NeuralNetConfiguration.ListBuilder) incompleteLRN();
    builder.setInputType(InputType.convolutional(28, 28, 3));

    MultiLayerConfiguration conf = builder.build();

    ConvolutionLayer layer2 = (ConvolutionLayer) conf.getConf(3).getLayer();
    assertEquals(6, layer2.getNIn());

}
 
Example #7
Source File: MultipleEpochsIteratorTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testNextAndReset() throws Exception {
    int epochs = 3;

    RecordReader rr = new CSVRecordReader();
    rr.initialize(new FileSplit(Resources.asFile("iris.txt")));
    DataSetIterator iter = new RecordReaderDataSetIterator(rr, 150);
    MultipleEpochsIterator multiIter = new MultipleEpochsIterator(epochs, iter);

    assertTrue(multiIter.hasNext());
    while (multiIter.hasNext()) {
        DataSet path = multiIter.next();
        assertFalse(path == null);
    }
    assertEquals(epochs, multiIter.epochs);
}
 
Example #8
Source File: ArrowConverterTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testRecordReaderAndWriteFile() throws Exception {
    val recordsToWrite = recordToWrite();
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    ArrowConverter.writeRecordBatchTo(recordsToWrite.getRight(),recordsToWrite.getFirst(),byteArrayOutputStream);
    byte[] arr = byteArrayOutputStream.toByteArray();
    val read = ArrowConverter.readFromBytes(arr);
    assertEquals(recordsToWrite,read);

    //send file
    File tmp =  tmpDataFile(recordsToWrite);
    RecordReader recordReader = new ArrowRecordReader();

    recordReader.initialize(new FileSplit(tmp));

    List<Writable> record = recordReader.next();
    assertEquals(2,record.size());

}
 
Example #9
Source File: JacksonRecordReaderTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
private static void testJacksonRecordReader(RecordReader rr) {

        List<Writable> json0 = rr.next();
        List<Writable> exp0 = Arrays.asList((Writable) new Text("aValue0"), new Text("bValue0"), new Text("cxValue0"));
        assertEquals(exp0, json0);

        List<Writable> json1 = rr.next();
        List<Writable> exp1 =
                        Arrays.asList((Writable) new Text("aValue1"), new Text("MISSING_B"), new Text("cxValue1"));
        assertEquals(exp1, json1);

        List<Writable> json2 = rr.next();
        List<Writable> exp2 =
                        Arrays.asList((Writable) new Text("aValue2"), new Text("bValue2"), new Text("MISSING_CX"));
        assertEquals(exp2, json2);

        assertFalse(rr.hasNext());

        //Test reset
        rr.reset();
        assertEquals(exp0, rr.next());
        assertEquals(exp1, rr.next());
        assertEquals(exp2, rr.next());
        assertFalse(rr.hasNext());
    }
 
Example #10
Source File: JacksonRecordReaderTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testReadingYaml() throws Exception {
    //Exact same information as JSON format, but in YAML format

    ClassPathResource cpr = new ClassPathResource("datavec-api/yaml/");
    File f = testDir.newFolder();
    cpr.copyDirectory(f);
    String path = new File(f, "yaml_test_%d.txt").getAbsolutePath();


    InputSplit is = new NumberedFileInputSplit(path, 0, 2);

    RecordReader rr = new JacksonRecordReader(getFieldSelection(), new ObjectMapper(new YAMLFactory()));
    rr.initialize(is);

    testJacksonRecordReader(rr);
}
 
Example #11
Source File: TfidfRecordReaderTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testReadRecordFromMetaData() throws Exception {
    TfidfVectorizer vectorizer = new TfidfVectorizer();
    Configuration conf = new Configuration();
    conf.setInt(TfidfVectorizer.MIN_WORD_FREQUENCY, 1);
    conf.setBoolean(RecordReader.APPEND_LABEL, true);
    vectorizer.initialize(conf);
    TfidfRecordReader reader = new TfidfRecordReader();
    reader.initialize(conf, new FileSplit(new ClassPathResource("labeled").getFile()));

    Record record = reader.nextRecord();

    Record reread = reader.loadFromMetaData(record.getMetaData());

    assertEquals(record.getRecord().size(), 2);
    assertEquals(reread.getRecord().size(), 2);
    assertEquals(record.getRecord().get(0), reread.getRecord().get(0));
    assertEquals(record.getRecord().get(1), reread.getRecord().get(1));
    assertEquals(record.getMetaData(), reread.getMetaData());
}
 
Example #12
Source File: DiabetesFileDataSource.java    From FederatedAndroidTrainer with MIT License 6 votes vote down vote up
private void createDataSource() throws IOException, InterruptedException {
    //First: get the dataset using the record reader. CSVRecordReader handles loading/parsing
    int numLinesToSkip = 0;
    String delimiter = ",";
    RecordReader recordReader = new CSVRecordReader(numLinesToSkip, delimiter);
    recordReader.initialize(new InputStreamInputSplit(dataFile));

    //Second: the RecordReaderDataSetIterator handles conversion to DataSet objects, ready for use in neural network
    int labelIndex = 11;

    DataSetIterator iterator = new RecordReaderDataSetIterator(recordReader, batchSize, labelIndex, labelIndex, true);
    DataSet allData = iterator.next();

    SplitTestAndTrain testAndTrain = allData.splitTestAndTrain(0.80);  //Use 80% of data for training

    trainingData = testAndTrain.getTrain();
    testData = testAndTrain.getTest();

    //We need to normalize our data. We'll use NormalizeStandardize (which gives us mean 0, unit variance):
    DataNormalization normalizer = new NormalizerStandardize();
    normalizer.fit(trainingData);           //Collect the statistics (mean/stdev) from the training data. This does not modify the input data
    normalizer.transform(trainingData);     //Apply normalization to the training data
    normalizer.transform(testData);         //Apply normalization to the test data. This is using statistics calculated from the *training* set
}
 
Example #13
Source File: ArrowConverterTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testRecordReaderAndWriteFile() throws Exception {
    val recordsToWrite = recordToWrite();
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    ArrowConverter.writeRecordBatchTo(recordsToWrite.getRight(),recordsToWrite.getFirst(),byteArrayOutputStream);
    byte[] arr = byteArrayOutputStream.toByteArray();
    val read = ArrowConverter.readFromBytes(arr);
    assertEquals(recordsToWrite,read);

    //send file
    File tmp =  tmpDataFile(recordsToWrite);
    RecordReader recordReader = new ArrowRecordReader();

    recordReader.initialize(new FileSplit(tmp));

    List<Writable> record = recordReader.next();
    assertEquals(2,record.size());

}
 
Example #14
Source File: TfidfRecordReaderTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testReadRecordFromMetaData() throws Exception {
    TfidfVectorizer vectorizer = new TfidfVectorizer();
    Configuration conf = new Configuration();
    conf.setInt(TfidfVectorizer.MIN_WORD_FREQUENCY, 1);
    conf.setBoolean(RecordReader.APPEND_LABEL, true);
    vectorizer.initialize(conf);
    TfidfRecordReader reader = new TfidfRecordReader();
    File f = testDir.newFolder();
    new ClassPathResource("datavec-data-nlp/labeled/").copyDirectory(f);
    reader.initialize(conf, new FileSplit(f));

    Record record = reader.nextRecord();

    Record reread = reader.loadFromMetaData(record.getMetaData());

    assertEquals(record.getRecord().size(), 2);
    assertEquals(reread.getRecord().size(), 2);
    assertEquals(record.getRecord().get(0), reread.getRecord().get(0));
    assertEquals(record.getRecord().get(1), reread.getRecord().get(1));
    assertEquals(record.getMetaData(), reread.getMetaData());
}
 
Example #15
Source File: VasttextTextVectorizer.java    From scava with Eclipse Public License 2.0 6 votes vote down vote up
@Override
public void fit(RecordReader reader, RecordCallBack callBack) {
       while (reader.hasNext()) {
           Record record = reader.nextRecord();
           String s = record.getRecord().get(0).toString();
           Tokenizer tokenizer = tokenizerFactory.create(s);
           cache.incrementNumDocs(1);
           List<String> tokens = new ArrayList<String>(); //These tokens might be different from those of the tokenizer if used with stopwords
           if(stopWords==null)
           	tokens=doWithTokens(tokenizer);
           else
           	tokens=doWithTokensStopWords(tokenizer);
           if(maxNgrams>1)
           	doWithNgram(ngramsGenerator(tokens));
           if (callBack != null)
               callBack.onRecord(record);
       }

}
 
Example #16
Source File: ListStringInputFormat.java    From DataVec with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a reader from an input split
 *
 * @param split the split to read
 * @return the reader from the given input split
 */
@Override
public RecordReader createReader(InputSplit split) throws IOException, InterruptedException {
    RecordReader reader = new ListStringRecordReader();
    reader.initialize(split);
    return reader;
}
 
Example #17
Source File: TrainUtil.java    From FancyBing with GNU General Public License v3.0 5 votes vote down vote up
public static MultiDataSetIterator loadDataSetIter(String dataPath, int batchSize, int labelIndex, int outputNum) throws Exception {
    	RecordReader reader = new FeatureRecordReader();
    	reader.initialize(new FileSplit(new File(dataPath)));
    	MultiDataSetIterator iterator = new RecordReaderMultiDataSetIterator.Builder(batchSize)
    	        .addReader("reader", reader)
    	        .addInput("reader", 0, labelIndex - 1) //Input: all columns from input reader
    	        .addOutputOneHot("reader", labelIndex, outputNum)   //Output 1: one-hot for classification
    	        .addOutput("reader", labelIndex + 1, labelIndex + 1) //Output 2: for value regression
//    	        .addOutput("reader", labelIndex + 2, labelIndex + 2) //Output 2: for win/lost regression
    	        .build();
    	
    	MultiDataSetIterator iter = new AsyncMultiDataSetIterator(iterator, 1);
		return iter;
    }
 
Example #18
Source File: LineReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testLineReader() throws Exception {
    String tempDir = System.getProperty("java.io.tmpdir");
    File tmpdir = new File(tempDir, "tmpdir-testLineReader");
    if (tmpdir.exists())
        tmpdir.delete();
    tmpdir.mkdir();

    File tmp1 = new File(FilenameUtils.concat(tmpdir.getPath(), "tmp1.txt"));
    File tmp2 = new File(FilenameUtils.concat(tmpdir.getPath(), "tmp2.txt"));
    File tmp3 = new File(FilenameUtils.concat(tmpdir.getPath(), "tmp3.txt"));

    FileUtils.writeLines(tmp1, Arrays.asList("1", "2", "3"));
    FileUtils.writeLines(tmp2, Arrays.asList("4", "5", "6"));
    FileUtils.writeLines(tmp3, Arrays.asList("7", "8", "9"));

    InputSplit split = new FileSplit(tmpdir);

    RecordReader reader = new LineRecordReader();
    reader.initialize(split);

    int count = 0;
    List<List<Writable>> list = new ArrayList<>();
    while (reader.hasNext()) {
        List<Writable> l = reader.next();
        assertEquals(1, l.size());
        list.add(l);
        count++;
    }

    assertEquals(9, count);

    try {
        FileUtils.deleteDirectory(tmpdir);
    } catch (Exception e) {
        e.printStackTrace();
    }
}
 
Example #19
Source File: ComposableRecordReader.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public List<Writable> next() {
    List<Writable> ret = new ArrayList<>();
    if (this.hasNext()) {
        for (RecordReader reader : readers) {
            ret.addAll(reader.next());
        }
    }
    invokeListeners(ret);
    return ret;
}
 
Example #20
Source File: DataSetIteratorHelper.java    From Java-Deep-Learning-Cookbook with MIT License 5 votes vote down vote up
private static RecordReader applyTransform(RecordReader recordReader, Schema schema){
    final TransformProcess transformProcess = new TransformProcess.Builder(schema)
            .removeColumns("RowNumber","CustomerId","Surname")
            .categoricalToInteger("Gender")
            .categoricalToOneHot("Geography")
            .removeColumns("Geography[France]")
            .build();
    final TransformProcessRecordReader transformProcessRecordReader = new TransformProcessRecordReader(recordReader,transformProcess);
    return  transformProcessRecordReader;

}
 
Example #21
Source File: VasttextDataIterator.java    From scava with Eclipse Public License 2.0 5 votes vote down vote up
@Override
public MultiDataSet next(int num) {
	if (!hasNext())
		throw new NoSuchElementException("No next elements");

	// First: load the next values from the RR / SeqRRs
	Map<String, List<List<Writable>>> nextRRVals = new HashMap<>();
	List<RecordMetaDataComposableMap> nextMetas = (collectMetaData ? new ArrayList<RecordMetaDataComposableMap>()
			: null);

	for (Map.Entry<String, RecordReader> entry : recordReaders.entrySet()) {
		RecordReader rr = entry.getValue();
		// Standard case
			List<List<Writable>> writables = new ArrayList<>(Math.min(num, 100000)); // Min op: in case user puts
																						// batch size >> amount of
																						// data
			for (int i = 0; i < num && rr.hasNext(); i++) 
			{
				List<Writable> record;
				if (collectMetaData) {
					Record r = rr.nextRecord();
					record = r.getRecord();
					if (nextMetas.size() <= i) {
						nextMetas.add(new RecordMetaDataComposableMap(new HashMap<String, RecordMetaData>()));
					}
					RecordMetaDataComposableMap map = nextMetas.get(i);
					map.getMeta().put(entry.getKey(), r.getMetaData());
				} else {
					record = rr.next();
				}
				writables.add(record);
			}

			nextRRVals.put(entry.getKey(), writables);
	}

	return nextMultiDataSet(nextRRVals, nextMetas);
}
 
Example #22
Source File: RecordReaderConverter.java    From DataVec with Apache License 2.0 5 votes vote down vote up
/**
 * Write all values from the specified record reader to the specified record writer.
 * Optionally, close the record writer on completion
 *
 * @param reader Record reader (source of data)
 * @param writer Record writer (location to write data)
 * @param closeOnCompletion if true: close the record writer once complete, via {@link RecordWriter#close()}
 * @throws IOException If underlying reader/writer throws an exception
 */
public static void convert(RecordReader reader, RecordWriter writer, boolean closeOnCompletion) throws IOException {

    if(!reader.hasNext()){
        throw new UnsupportedOperationException("Cannot convert RecordReader: reader has no next element");
    }

    while(reader.hasNext()){
        writer.write(reader.next());
    }

    if(closeOnCompletion){
        writer.close();
    }
}
 
Example #23
Source File: JacksonRecordReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testReadingYaml() throws Exception {
    //Exact same information as JSON format, but in YAML format

    ClassPathResource cpr = new ClassPathResource("yaml/yaml_test_0.txt");
    String path = cpr.getFile().getAbsolutePath().replace("0", "%d");

    InputSplit is = new NumberedFileInputSplit(path, 0, 2);

    RecordReader rr = new JacksonRecordReader(getFieldSelection(), new ObjectMapper(new YAMLFactory()));
    rr.initialize(is);

    testJacksonRecordReader(rr);
}
 
Example #24
Source File: TestSerialization.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testRR() throws Exception {

    List<RecordReader> rrs = new ArrayList<>();

    rrs.add(new CSVNLinesSequenceRecordReader(10));
    rrs.add(new CSVRecordReader(10, ','));
    rrs.add(new CSVSequenceRecordReader(1, ","));
    rrs.add(new CSVVariableSlidingWindowRecordReader(5));
    rrs.add(new CSVRegexRecordReader(0, ",", null, new String[] {null, "(.+) (.+) (.+)"}));
    rrs.add(new JacksonRecordReader(new FieldSelection.Builder().addField("a").addField(new Text("MISSING_B"), "b")
            .addField(new Text("MISSING_CX"), "c", "x").build(), new ObjectMapper(new JsonFactory())));
    rrs.add(new JacksonLineRecordReader(new FieldSelection.Builder().addField("value1")
    		.addField("value2").build(), new ObjectMapper(new JsonFactory())));
    rrs.add(new LibSvmRecordReader());
    rrs.add(new SVMLightRecordReader());
    rrs.add(new RegexLineRecordReader("(.+) (.+) (.+)", 0));
    rrs.add(new RegexSequenceRecordReader("(.+) (.+) (.+)", 0));
    rrs.add(new TransformProcessRecordReader(new CSVRecordReader(), getTp()));
    rrs.add(new TransformProcessSequenceRecordReader(new CSVSequenceRecordReader(), getTp()));
    rrs.add(new LineRecordReader());

    for(RecordReader r : rrs){
        System.out.println(r.getClass().getName());
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        ObjectOutputStream os = new ObjectOutputStream(baos);
        os.writeObject(r);
        byte[] bytes = baos.toByteArray();

        ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(bytes));

        RecordReader r2 = (RecordReader) ois.readObject();
    }
}
 
Example #25
Source File: JacksonLineRecordReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
   public void testReadJSON() throws Exception {
      
       RecordReader rr = new JacksonLineRecordReader(getFieldSelection(), new ObjectMapper(new JsonFactory()));
       rr.initialize(new FileSplit(new ClassPathResource("json/json_test_3.txt").getFile()));
       
       testJacksonRecordReader(rr);
}
 
Example #26
Source File: TextVectorizer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void fit(RecordReader reader, RecordCallBack callBack) {
    while (reader.hasNext()) {
        Record record = reader.nextRecord();
        String s = toString(record.getRecord());
        Tokenizer tokenizer = tokenizerFactory.create(s);
        doWithTokens(tokenizer);
        if (callBack != null)
            callBack.onRecord(record);
        cache.incrementNumDocs(1);
    }
}
 
Example #27
Source File: ModelGenerator.java    From arabic-characters-recognition with Apache License 2.0 5 votes vote down vote up
private static DataSetIterator readCSVDataset(String csvFileClasspath, int BATCH_SIZE, int LABEL_INDEX, int numClasses)
        throws IOException, InterruptedException {

    RecordReader rr = new CSVRecordReader();
    rr.initialize(new FileSplit(new File(csvFileClasspath)));
    DataSetIterator iterator = new RecordReaderDataSetIterator(rr, BATCH_SIZE, LABEL_INDEX, numClasses);

    return iterator;
}
 
Example #28
Source File: RecordReaderDataSetIterator.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * Main constructor
 *
 * @param recordReader      the recordreader to use
 * @param converter         Converter. May be null.
 * @param batchSize         Minibatch size - number of examples returned for each call of .next()
 * @param labelIndexFrom    the index of the label (for classification), or the first index of the labels for multi-output regression
 * @param labelIndexTo      only used if regression == true. The last index <i>inclusive</i> of the multi-output regression
 * @param numPossibleLabels the number of possible labels for classification. Not used if regression == true
 * @param maxNumBatches     Maximum number of batches to return
 * @param regression        if true: regression. If false: classification (assume labelIndexFrom is the class it belongs to)
 */
public RecordReaderDataSetIterator(RecordReader recordReader, WritableConverter converter, int batchSize,
                                   int labelIndexFrom, int labelIndexTo, int numPossibleLabels, int maxNumBatches,
                                   boolean regression) {
    this.recordReader = recordReader;
    this.converter = converter;
    this.batchSize = batchSize;
    this.maxNumBatches = maxNumBatches;
    this.labelIndex = labelIndexFrom;
    this.labelIndexTo = labelIndexTo;
    this.numPossibleLabels = numPossibleLabels;
    this.regression = regression;
}
 
Example #29
Source File: ListStringInputFormat.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a reader from an input split
 *
 * @param split the split to read
 * @return the reader from the given input split
 */
@Override
public RecordReader createReader(InputSplit split) throws IOException, InterruptedException {
    RecordReader reader = new ListStringRecordReader();
    reader.initialize(split);
    return reader;
}
 
Example #30
Source File: LineInputFormat.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public RecordReader createReader(InputSplit split) throws IOException, InterruptedException {
    LineRecordReader ret = new LineRecordReader();
    ret.initialize(split);
    return ret;

}