org.datavec.api.records.Record Java Examples

The following examples show how to use org.datavec.api.records.Record. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TfidfRecordReaderTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testRecordMetaData() throws Exception {
    TfidfVectorizer vectorizer = new TfidfVectorizer();
    Configuration conf = new Configuration();
    conf.setInt(TfidfVectorizer.MIN_WORD_FREQUENCY, 1);
    conf.setBoolean(RecordReader.APPEND_LABEL, true);
    vectorizer.initialize(conf);
    TfidfRecordReader reader = new TfidfRecordReader();
    File f = testDir.newFolder();
    new ClassPathResource("datavec-data-nlp/labeled/").copyDirectory(f);
    reader.initialize(conf, new FileSplit(f));

    while (reader.hasNext()) {
        Record record = reader.nextRecord();
        assertNotNull(record.getMetaData().getURI());
        assertEquals(record.getMetaData().getReaderClass(), TfidfRecordReader.class);
    }
}
 
Example #2
Source File: InferenceExecutionerStepRunner.java    From konduit-serving with Apache License 2.0 6 votes vote down vote up
private Record[] toNDArray(Record[] records) {
    if (records[0].getRecord().size() > 1 && !recordIsAllNumeric(records[0])) {
        throw new IllegalArgumentException("Invalid record type passed in. This pipeline only accepts records with singular ndarray records representing 1 input array per name for input graphs or purely numeric arrays that can be converted to a matrix");
    } else if (allNdArray(records)) {
        return records;
    } else {
        INDArray arr = Nd4j.create(records.length, records[0].getRecord().size());
        for (int i = 0; i < arr.rows(); i++) {
            for (int j = 0; j < arr.columns(); j++) {
                arr.putScalar(i, j, records[i].getRecord().get(j).toDouble());
            }
        }

        return new Record[]{
                new org.datavec.api.records.impl.Record(
                        Collections.singletonList(new NDArrayWritable(arr))
                        , null
                )};
    }
}
 
Example #3
Source File: VasttextTextVectorizer.java    From scava with Eclipse Public License 2.0 6 votes vote down vote up
@Override
public void fit(RecordReader reader, RecordCallBack callBack) {
       while (reader.hasNext()) {
           Record record = reader.nextRecord();
           String s = record.getRecord().get(0).toString();
           Tokenizer tokenizer = tokenizerFactory.create(s);
           cache.incrementNumDocs(1);
           List<String> tokens = new ArrayList<String>(); //These tokens might be different from those of the tokenizer if used with stopwords
           if(stopWords==null)
           	tokens=doWithTokens(tokenizer);
           else
           	tokens=doWithTokensStopWords(tokenizer);
           if(maxNgrams>1)
           	doWithNgram(ngramsGenerator(tokens));
           if (callBack != null)
               callBack.onRecord(record);
       }

}
 
Example #4
Source File: TfidfRecordReader.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Override
public List<Record> loadFromMetaData(List<RecordMetaData> recordMetaDatas) throws IOException {
    List<Record> out = new ArrayList<>();

    for (Record fileContents : super.loadFromMetaData(recordMetaDatas)) {
        INDArray transform = tfidfVectorizer.transform(fileContents);

        org.datavec.api.records.impl.Record record = new org.datavec.api.records.impl.Record(
                        new ArrayList<>(Collections.<Writable>singletonList(new NDArrayWritable(transform))),
                        new RecordMetaDataURI(fileContents.getMetaData().getURI(), TfidfRecordReader.class));

        if (appendLabel)
            record.getRecord().add(fileContents.getRecord().get(fileContents.getRecord().size() - 1));
        out.add(record);
    }

    return out;
}
 
Example #5
Source File: TransformProcessRecordReader.java    From DataVec with Apache License 2.0 6 votes vote down vote up
/**
 * Whether there are anymore records
 *
 * @return
 */
@Override
public boolean hasNext() {
    if(next != null){
        return true;
    }
    if(!recordReader.hasNext()){
        return false;
    }

    //Prefetch, until we find one that isn't filtered out - or we run out of data
    while(next == null && recordReader.hasNext()){
        Record r = recordReader.nextRecord();
        List<Writable> temp = transformProcess.execute(r.getRecord());
        if(temp == null){
            continue;
        }
        next = new org.datavec.api.records.impl.Record(temp, r.getMetaData());
    }

    return next != null;
}
 
Example #6
Source File: VasttextExtraFileReader.java    From scava with Eclipse Public License 2.0 6 votes vote down vote up
public Record processNextRecord() {
    //We need to split and find the label(s)
    String[] line = super.next().get(0).toString().split(" ");
    double[] extraFeatures = new double[numericFeaturesSize];
    if(line.length != numericFeaturesSize)
    	 throw new UnsupportedOperationException("Features defined and features found do not match. Found: "+ line.length + " Declared:" +numericFeaturesSize);
    for(int i=0; i<numericFeaturesSize; i++)
    {
    	extraFeatures[i]=Double.valueOf(line[i]);
    }
    INDArray transformed = Nd4j.create(extraFeatures,new int[]{extraFeatures.length,1});
    
    URI uri = (locations == null || locations.length < 1 ? null : locations[splitIndex]);
    RecordMetaData meta = new RecordMetaDataLine(this.lineIndex - 1, uri, LineRecordReader.class); //-1 as line number has been incremented already...
    return new org.datavec.api.records.impl.Record(new ArrayList<>(Collections.<Writable>singletonList(new NDArrayWritable(transformed)))
    		, meta);
}
 
Example #7
Source File: VasttextTextFileReader.java    From scava with Eclipse Public License 2.0 6 votes vote down vote up
@Override
public List<Record> loadFromMetaData(List<RecordMetaData> recordMetaDatas) throws IOException {
    List<Record> out = new ArrayList<>();

    for (Record fileContents : super.loadFromMetaData(recordMetaDatas)) {
    	INDArray transformed = vasttextTextVectorizer.transform(fileContents);

       org.datavec.api.records.impl.Record transformedRecord = new org.datavec.api.records.impl.Record(
    		   			new ArrayList<>(Collections.<Writable>singletonList(new NDArrayWritable(transformed))),
    		   			new RecordMetaDataURI(fileContents.getMetaData().getURI(), VasttextTextFileReader.class));
       if (labelled)
    	   transformedRecord.getRecord().add(fileContents.getRecord().get(fileContents.getRecord().size() - 1));
        out.add(transformedRecord);
    }

    return out;
}
 
Example #8
Source File: SVMLightRecordReaderTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testNextRecord() throws IOException, InterruptedException {
    SVMLightRecordReader rr = new SVMLightRecordReader();
    Configuration config = new Configuration();
    config.setBoolean(SVMLightRecordReader.ZERO_BASED_INDEXING, false);
    config.setInt(SVMLightRecordReader.NUM_FEATURES, 10);
    config.setBoolean(SVMLightRecordReader.APPEND_LABEL, false);
    rr.initialize(config, new FileSplit(new ClassPathResource("datavec-api/svmlight/basic.txt").getFile()));

    Record record = rr.nextRecord();
    List<Writable> recordList = record.getRecord();
    assertEquals(new DoubleWritable(1.0), recordList.get(1));
    assertEquals(new DoubleWritable(3.0), recordList.get(5));
    assertEquals(new DoubleWritable(4.0), recordList.get(7));

    record = rr.nextRecord();
    recordList = record.getRecord();
    assertEquals(new DoubleWritable(0.1), recordList.get(0));
    assertEquals(new DoubleWritable(6.6), recordList.get(5));
    assertEquals(new DoubleWritable(80.0), recordList.get(7));
}
 
Example #9
Source File: JDBCRecordReader.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/**
 * @see #loadFromMetaData(RecordMetaData)
 */
@Override
public List<Record> loadFromMetaData(List<RecordMetaData> recordMetaDatas) throws IOException {
    List<Record> ret = new ArrayList<>();

    for (RecordMetaData rmd : recordMetaDatas) {
        if (!(rmd instanceof RecordMetaDataJdbc)) {
            throw new IllegalArgumentException(
                "Invalid metadata; expected RecordMetaDataJdbc instance; got: " + rmd);
        }
        QueryRunner runner = new QueryRunner();
        String request = ((RecordMetaDataJdbc) rmd).getRequest();

        try {
            Object[] item = runner
                .query(this.conn, request, new ArrayHandler(), ((RecordMetaDataJdbc) rmd).getParams().toArray());
            ret.add(new org.datavec.api.records.impl.Record(toWritable(item), rmd));
        } catch (SQLException e) {
            throw new IllegalArgumentException("Could not execute statement \"" + request + "\"", e);
        }
    }
    return ret;
}
 
Example #10
Source File: TransformProcessRecordReader.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/**
 * Whether there are anymore records
 *
 * @return
 */
@Override
public boolean hasNext() {
    if(next != null){
        return true;
    }
    if(!recordReader.hasNext()){
        return false;
    }

    //Prefetch, until we find one that isn't filtered out - or we run out of data
    while(next == null && recordReader.hasNext()){
        Record r = recordReader.nextRecord();
        List<Writable> temp = transformProcess.execute(r.getRecord());
        if(temp == null){
            continue;
        }
        next = new org.datavec.api.records.impl.Record(temp, r.getMetaData());
    }

    return next != null;
}
 
Example #11
Source File: VasttextTextVectorizer.java    From scava with Eclipse Public License 2.0 5 votes vote down vote up
@Override
public INDArray transform(Record record) {
	List<String> tokens = tokensFromRecord(record.getRecord().get(0));
	List<Integer> featuresWeights = null;
	if(record.getRecord().size()>2)
	{
		featuresWeights = featuresWeightsFromRecord(record.getRecord().get(1));
	}
   	return createVector(new Object[] {tokens, featuresWeights});
}
 
Example #12
Source File: ArrowConverterTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testRecordReaderMetaData() throws Exception {
    val recordsToWrite = recordToWrite();
    //send file
    File tmp =  tmpDataFile(recordsToWrite);
    RecordReader recordReader = new ArrowRecordReader();
    RecordMetaDataIndex recordMetaDataIndex = new RecordMetaDataIndex(0,tmp.toURI(),ArrowRecordReader.class);
    recordReader.loadFromMetaData(recordMetaDataIndex);

    Record record = recordReader.nextRecord();
    assertEquals(2,record.getRecord().size());
}
 
Example #13
Source File: JDBCRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testNextRecordAndRecover() throws Exception {
    try (JDBCRecordReader reader = getInitializedReader("SELECT * FROM Coffee")) {
        Record r = reader.nextRecord();
        List<Writable> fields = r.getRecord();
        RecordMetaData meta = r.getMetaData();
        Record recovered = reader.loadFromMetaData(meta);
        List<Writable> fieldsRecovered = recovered.getRecord();
        assertEquals(fields.size(), fieldsRecovered.size());
        for (int i = 0; i < fields.size(); i++) {
            assertEquals(fields.get(i), fieldsRecovered.get(i));
        }
    }
}
 
Example #14
Source File: MultiLabelMetrics.java    From konduit-serving with Apache License 2.0 5 votes vote down vote up
private void incrementClassificationCounters(Record[] records) {
    if(classCounterIncrement != null) {
        NDArrayWritable ndArrayWritable = (NDArrayWritable) records[0].getRecord().get(0);
        INDArray output = ndArrayWritable.get();
        INDArray argMax = Nd4j.argMax(output, -1);
        for (int i = 0; i < argMax.length(); i++) {
            CurrentClassTrackerCount classTrackerCount = classTrackerCounts.get(argMax.getInt(i));
            classTrackerCount.increment(1.0);
        }
    }
}
 
Example #15
Source File: FileRecordReader.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public List<Record> loadFromMetaData(List<RecordMetaData> recordMetaDatas) throws IOException {
    List<Record> out = new ArrayList<>();

    for (RecordMetaData meta : recordMetaDatas) {
        URI uri = meta.getURI();

        File f = new File(uri);
        List<Writable> list = loadFromFile(f);
        out.add(new org.datavec.api.records.impl.Record(list, meta));
    }

    return out;
}
 
Example #16
Source File: RegexLineRecordReader.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public Record nextRecord() {
    List<Writable> next = next();
    URI uri = (locations == null || locations.length < 1 ? null : locations[splitIndex]);
    RecordMetaData meta = new RecordMetaDataLine(this.lineIndex - 1, uri, RegexLineRecordReader.class); //-1 as line number has been incremented already...
    return new org.datavec.api.records.impl.Record(next, meta);
}
 
Example #17
Source File: Pipeline.java    From konduit-serving with Apache License 2.0 5 votes vote down vote up
/**
 * Executes a pipeline on a set of input {@link Record}
 *
 * @param inputs the array of records (one "row" per input).
 * @return the output set of records
 */
public Record[] doPipeline(Record[] inputs) {
    for (PipelineStepRunner pipelineStepRunner : steps)
        inputs = pipelineStepRunner.transform(inputs);

    return inputs;
}
 
Example #18
Source File: PipelineExecutioner.java    From konduit-serving with Apache License 2.0 5 votes vote down vote up
/**
 * Creates input for use in the {@link PipelineExecutioner}
 * @param input the input object
 * @param transformProcess the {@link TransformProcess} to use
 * @param conversionSchema The {@link Schema} to use
 * @return the DataVec type input records.
 */
public static Record[] createInput(Object input,TransformProcess transformProcess,Schema conversionSchema) {
    Preconditions.checkNotNull(input, "Input data was null!");

    if(input instanceof String) {
        String inputJson = (String) input;
        if (inputJson.charAt(0) == '{') {
            //json object
            log.info("Auto converting json object to json array");
            inputJson = "[" + input + "]";
        }

        JsonArray jsonArray = new JsonArray(inputJson);
        ArrowWritableRecordBatch convert;
        try {
            convert = mapConverter.convert(conversionSchema, jsonArray, transformProcess);
        } catch (Exception e) {
            log.error("Error performing conversion", e);
            throw e;
        }

        Preconditions.checkNotNull(convert, "Conversion was null!");
        Record[] pipelineInput = new Record[convert.size()];
        for (int i = 0; i < pipelineInput.length; i++) {
            pipelineInput[i] = new ArrowRecord(convert, i, null);
        }

        return pipelineInput;
    }

    else {
        //ndarrays already
        return (Record[]) input;
    }

}
 
Example #19
Source File: TransformProcessRecordReader.java    From DataVec with Apache License 2.0 5 votes vote down vote up
/**
 * Similar to {@link #next()}, but returns a {@link Record} object, that may include metadata such as the source
 * of the data
 *
 * @return next record
 */
@Override
public Record nextRecord() {
    if(!hasNext()){ //Also triggers prefetch
        throw new NoSuchElementException("No next element");
    }
    Record toRet = next;
    next = null;
    return toRet;
}
 
Example #20
Source File: TestImageRecordReader.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testMetaData() throws IOException {

    ClassPathResource cpr = new ClassPathResource("/testimages/class0/0.jpg");
    File parentDir = cpr.getFile().getParentFile().getParentFile();
    //        System.out.println(f.getAbsolutePath());
    //        System.out.println(f.getParentFile().getParentFile().getAbsolutePath());
    ParentPathLabelGenerator labelMaker = new ParentPathLabelGenerator();
    ImageRecordReader rr = new ImageRecordReader(32, 32, 3, labelMaker);
    rr.initialize(new FileSplit(parentDir));

    List<List<Writable>> out = new ArrayList<>();
    while (rr.hasNext()) {
        List<Writable> l = rr.next();
        out.add(l);
        assertEquals(2, l.size());
    }

    assertEquals(6, out.size());

    rr.reset();
    List<List<Writable>> out2 = new ArrayList<>();
    List<Record> out3 = new ArrayList<>();
    List<RecordMetaData> meta = new ArrayList<>();

    while (rr.hasNext()) {
        Record r = rr.nextRecord();
        out2.add(r.getRecord());
        out3.add(r);
        meta.add(r.getMetaData());
        //            System.out.println(r.getMetaData() + "\t" + r.getRecord().get(1));
    }

    assertEquals(out, out2);

    List<Record> fromMeta = rr.loadFromMetaData(meta);
    assertEquals(out3, fromMeta);
}
 
Example #21
Source File: SchemaTypeUtils.java    From konduit-serving with Apache License 2.0 5 votes vote down vote up
/**
 * Convert an {@link INDArray}
 * batch to {@link Record}
 * input comprising of a single {@link NDArrayWritable}
 *
 * @param input the input
 * @return the equivalent output records
 */
public static Record[] toRecords(INDArray[] input) {
    Record[] ret = new Record[input.length];
    for (int i = 0; i < ret.length; i++) {
        ret[i] = new org.datavec.api.records.impl.Record(
                Arrays.asList(new NDArrayWritable(input[i]))
                , null);
    }

    return ret;
}
 
Example #22
Source File: TextVectorizer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void fit(RecordReader reader, RecordCallBack callBack) {
    while (reader.hasNext()) {
        Record record = reader.nextRecord();
        String s = toString(record.getRecord());
        Tokenizer tokenizer = tokenizerFactory.create(s);
        doWithTokens(tokenizer);
        if (callBack != null)
            callBack.onRecord(record);
        cache.incrementNumDocs(1);
    }
}
 
Example #23
Source File: ArrowRecordReader.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Record loadFromMetaData(RecordMetaData recordMetaData) {
    if(!(recordMetaData instanceof RecordMetaDataIndex)) {
        throw new IllegalArgumentException("Unable to load from meta data. No index specified for record");
    }

    RecordMetaDataIndex index = (RecordMetaDataIndex) recordMetaData;
    InputSplit fileSplit = new FileSplit(new File(index.getURI()));
    initialize(fileSplit);
    this.currIdx = (int) index.getIndex();
    return nextRecord();
}
 
Example #24
Source File: VasttextMemoryRecordReader.java    From scava with Eclipse Public License 2.0 5 votes vote down vote up
@Override
public Record nextRecord() {
	List<Writable> next = next();
	//As we increase in next the listIndex, then we need to reduce in one the value of listIndex
	RecordMetaData meta = new RecordMetaDataIndex(listIndex-1, null, VasttextMemoryRecordReader.class);
	return new org.datavec.api.records.impl.Record(next, meta);
}
 
Example #25
Source File: VasttextDataIterator.java    From scava with Eclipse Public License 2.0 5 votes vote down vote up
@Override
public MultiDataSet next(int num) {
	if (!hasNext())
		throw new NoSuchElementException("No next elements");

	// First: load the next values from the RR / SeqRRs
	Map<String, List<List<Writable>>> nextRRVals = new HashMap<>();
	List<RecordMetaDataComposableMap> nextMetas = (collectMetaData ? new ArrayList<RecordMetaDataComposableMap>()
			: null);

	for (Map.Entry<String, RecordReader> entry : recordReaders.entrySet()) {
		RecordReader rr = entry.getValue();
		// Standard case
			List<List<Writable>> writables = new ArrayList<>(Math.min(num, 100000)); // Min op: in case user puts
																						// batch size >> amount of
																						// data
			for (int i = 0; i < num && rr.hasNext(); i++) 
			{
				List<Writable> record;
				if (collectMetaData) {
					Record r = rr.nextRecord();
					record = r.getRecord();
					if (nextMetas.size() <= i) {
						nextMetas.add(new RecordMetaDataComposableMap(new HashMap<String, RecordMetaData>()));
					}
					RecordMetaDataComposableMap map = nextMetas.get(i);
					map.getMeta().put(entry.getKey(), r.getMetaData());
				} else {
					record = rr.next();
				}
				writables.add(record);
			}

			nextRRVals.put(entry.getKey(), writables);
	}

	return nextMultiDataSet(nextRRVals, nextMetas);
}
 
Example #26
Source File: CSVRecordReader.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Record nextRecord() {
    List<Writable> next = next();
    URI uri = (locations == null || locations.length < 1 ? null : locations[splitIndex]);
    RecordMetaData meta = new RecordMetaDataLine(this.lineIndex - 1, uri, CSVRecordReader.class); //-1 as line number has been incremented already...
    return new org.datavec.api.records.impl.Record(next, meta);
}
 
Example #27
Source File: JDBCRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testNextRecord() throws Exception {
    try (JDBCRecordReader reader = getInitializedReader("SELECT * FROM Coffee")) {
        Record r = reader.nextRecord();
        List<Writable> fields = r.getRecord();
        RecordMetaData meta = r.getMetaData();
        assertNotNull(r);
        assertNotNull(fields);
        assertNotNull(meta);
        assertEquals(new Text("Bolivian Dark"), fields.get(0));
        assertEquals(new Text("14-001"), fields.get(1));
        assertEquals(new DoubleWritable(8.95), fields.get(2));
        assertEquals(RecordMetaDataJdbc.class, meta.getClass());
    }
}
 
Example #28
Source File: PmmlInferenceExecutionerStepRunner.java    From konduit-serving with Apache License 2.0 5 votes vote down vote up
@Override
public Record[] transform(Record[] input) {
    Schema schema = pipelineStep.inputSchemaForName("default");
    List<Map<FieldName, Object>> pmmlInput = new ArrayList<>(input.length);
    List<FieldName> fieldNames = new ArrayList<>();
    for (int i = 0; i < schema.numColumns(); i++) {
        fieldNames.add(FieldName.create(schema.getName(i)));
    }

    for (Record record : input) {
        Map<FieldName, Object> pmmlRecord = new LinkedHashMap<>();
        for (int i = 0; i < record.getRecord().size(); i++) {
            pmmlRecord.put(fieldNames.get(i), WritableValueRetriever.getUnderlyingValue(record.getRecord().get(i)));
        }

        pmmlInput.add(pmmlRecord);
    }

    List<Map<FieldName, Object>> execute = pmmlInferenceExecutioner.execute(pmmlInput);
    Record[] ret = new Record[1];
    String json = ObjectMappers.toJson(execute);

    ret[0] = new org.datavec.api.records.impl.Record(Collections.singletonList(new Text(json)), null);


    return ret;
}
 
Example #29
Source File: FeatureRecordReader.java    From FancyBing with GNU General Public License v3.0 5 votes vote down vote up
@Override
public Record nextRecord() {
    List<Writable> next = next();
    URI uri = (locations == null || locations.length < 1 ? null : locations[splitIndex]);
    RecordMetaData meta = new RecordMetaDataLine(this.lineIndex - 1, uri, LineRecordReader.class); //-1 as line number has been incremented already...
    return new org.datavec.api.records.impl.Record(next, meta);
}
 
Example #30
Source File: FileRecordReader.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public Record nextRecord() {
    if (iter == null || !iter.hasNext()) {
        this.advanceToNextLocation();
    }
    File next = iter.next();
    this.currentFile = next;
    invokeListeners(next);
    List<Writable> ret = loadFromFile(next);

    return new org.datavec.api.records.impl.Record(ret,
            new RecordMetaDataURI(next.toURI(), FileRecordReader.class));
}