Java Code Examples for org.datavec.api.transform.schema.Schema#getType()

The following examples show how to use org.datavec.api.transform.schema.Schema#getType() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: ArrowConverter.java From DataVec with Apache License 2.0

6 votes

private static List<FieldVector> createFieldVectors(BufferAllocator bufferAllocator,Schema schema, int numRows) {
    List<FieldVector> ret = new ArrayList<>(schema.numColumns());

    for(int i = 0; i < schema.numColumns(); i++) {
        switch (schema.getType(i)) {
            case Integer: ret.add(intVectorOf(bufferAllocator,schema.getName(i),numRows)); break;
            case Long: ret.add(longVectorOf(bufferAllocator,schema.getName(i),numRows)); break;
            case Double: ret.add(doubleVectorOf(bufferAllocator,schema.getName(i),numRows)); break;
            case Float: ret.add(floatVectorOf(bufferAllocator,schema.getName(i),numRows)); break;
            case Boolean: ret.add(booleanVectorOf(bufferAllocator,schema.getName(i),numRows)); break;
            case String: ret.add(stringVectorOf(bufferAllocator,schema.getName(i),numRows)); break;
            case Categorical: ret.add(stringVectorOf(bufferAllocator,schema.getName(i),numRows)); break;
            case Time: ret.add(timeVectorOf(bufferAllocator,schema.getName(i),numRows)); break;
            default: throw new IllegalArgumentException("Illegal type found " + schema.getType(i));

        }
    }

    return ret;
}

Example 2

Source File: LocalTransformExecutor.java From DataVec with Apache License 2.0

6 votes

/**
 * Convert a string time series to
 * the proper writable set based on the schema.
 * Note that this does not use arrow.
 * This just uses normal writable objects.
 *
 * @param stringInput the string input
 * @param schema the schema to use
 * @return the converted records
 */
public static List<List<Writable>> convertStringInput(List<List<String>> stringInput,Schema schema) {
    List<List<Writable>> ret = new ArrayList<>();
    List<List<Writable>> timeStepAdd = new ArrayList<>();
    for(int j = 0; j < stringInput.size(); j++) {
        List<String> record = stringInput.get(j);
        List<Writable> recordAdd = new ArrayList<>();
        for(int k = 0; k < record.size(); k++) {
            switch(schema.getType(k)) {
                case Double: recordAdd.add(new DoubleWritable(Double.parseDouble(record.get(k)))); break;
                case Float:  recordAdd.add(new FloatWritable(Float.parseFloat(record.get(k)))); break;
                case Integer:  recordAdd.add(new IntWritable(Integer.parseInt(record.get(k)))); break;
                case Long:  recordAdd.add(new LongWritable(Long.parseLong(record.get(k)))); break;
                case String: recordAdd.add(new Text(record.get(k))); break;
                case Time: recordAdd.add(new LongWritable(Long.parseLong(record.get(k)))); break;

            }
        }

        timeStepAdd.add(recordAdd);
    }


    return ret;
}

Example 3

Source File: NumericalColumnComparator.java From DataVec with Apache License 2.0

6 votes

@Override
public void setSchema(Schema sequenceSchema) {
    super.setSchema(sequenceSchema);
    this.columnType = sequenceSchema.getType(this.columnIdx);
    switch (columnType) {
        case Integer:
        case Long:
        case Double:
        case Time:
            //All ok. Time column uses LongWritables too...
            break;
        case Categorical:
        case Bytes:
        case String:
        default:
            throw new IllegalStateException(
                            "Cannot apply numerical column comparator on column of type " + columnType);
    }
}

Example 4

Source File: DataFrames.java From DataVec with Apache License 2.0

6 votes

/**
 * Convert a given Row to a list of writables, given the specified Schema
 *
 * @param schema Schema for the data
 * @param row    Row of data
 */
public static List<Writable> rowToWritables(Schema schema, Row row) {
    List<Writable> ret = new ArrayList<>();
    for (int i = 0; i < row.size(); i++) {
        switch (schema.getType(i)) {
            case Double:
                ret.add(new DoubleWritable(row.getDouble(i)));
                break;
            case Float:
                ret.add(new FloatWritable(row.getFloat(i)));
                break;
            case Integer:
                ret.add(new IntWritable(row.getInt(i)));
                break;
            case Long:
                ret.add(new LongWritable(row.getLong(i)));
                break;
            case String:
                ret.add(new Text(row.getString(i)));
                break;
            default:
                throw new IllegalStateException("Illegal type");
        }
    }
    return ret;
}

Example 5

Source File: ArrowConverter.java From deeplearning4j with Apache License 2.0

6 votes

private static List<FieldVector> createFieldVectors(BufferAllocator bufferAllocator,Schema schema, int numRows) {
    List<FieldVector> ret = new ArrayList<>(schema.numColumns());

    for(int i = 0; i < schema.numColumns(); i++) {
        switch (schema.getType(i)) {
            case Integer: ret.add(intVectorOf(bufferAllocator,schema.getName(i),numRows)); break;
            case Long: ret.add(longVectorOf(bufferAllocator,schema.getName(i),numRows)); break;
            case Double: ret.add(doubleVectorOf(bufferAllocator,schema.getName(i),numRows)); break;
            case Float: ret.add(floatVectorOf(bufferAllocator,schema.getName(i),numRows)); break;
            case Boolean: ret.add(booleanVectorOf(bufferAllocator,schema.getName(i),numRows)); break;
            case String: ret.add(stringVectorOf(bufferAllocator,schema.getName(i),numRows)); break;
            case Categorical: ret.add(stringVectorOf(bufferAllocator,schema.getName(i),numRows)); break;
            case Time: ret.add(timeVectorOf(bufferAllocator,schema.getName(i),numRows)); break;
            case NDArray: ret.add(ndarrayVectorOf(bufferAllocator,schema.getName(i),numRows)); break;
            default: throw new IllegalArgumentException("Illegal type found for creation of field vectors" + schema.getType(i));
        }
    }

    return ret;
}

Example 6

Source File: LocalTransformExecutor.java From deeplearning4j with Apache License 2.0

6 votes

/**
 * Convert a string time series to
 * the proper writable set based on the schema.
 * Note that this does not use arrow.
 * This just uses normal writable objects.
 *
 * @param stringInput the string input
 * @param schema the schema to use
 * @return the converted records
 */
public static List<List<Writable>> convertStringInput(List<List<String>> stringInput,Schema schema) {
    List<List<Writable>> ret = new ArrayList<>();
    List<List<Writable>> timeStepAdd = new ArrayList<>();
    for(int j = 0; j < stringInput.size(); j++) {
        List<String> record = stringInput.get(j);
        List<Writable> recordAdd = new ArrayList<>();
        for(int k = 0; k < record.size(); k++) {
            switch(schema.getType(k)) {
                case Double: recordAdd.add(new DoubleWritable(Double.parseDouble(record.get(k)))); break;
                case Float:  recordAdd.add(new FloatWritable(Float.parseFloat(record.get(k)))); break;
                case Integer:  recordAdd.add(new IntWritable(Integer.parseInt(record.get(k)))); break;
                case Long:  recordAdd.add(new LongWritable(Long.parseLong(record.get(k)))); break;
                case String: recordAdd.add(new Text(record.get(k))); break;
                case Time: recordAdd.add(new LongWritable(Long.parseLong(record.get(k)))); break;

            }
        }

        timeStepAdd.add(recordAdd);
    }


    return ret;
}

Example 7

Source File: NumericalColumnComparator.java From deeplearning4j with Apache License 2.0

6 votes

@Override
public void setSchema(Schema sequenceSchema) {
    super.setSchema(sequenceSchema);
    this.columnType = sequenceSchema.getType(this.columnIdx);
    switch (columnType) {
        case Integer:
        case Long:
        case Double:
        case Time:
            //All ok. Time column uses LongWritables too...
            break;
        case Categorical:
        case Bytes:
        case String:
        default:
            throw new IllegalStateException(
                            "Cannot apply numerical column comparator on column of type " + columnType);
    }
}

Example 8

Source File: DataFrames.java From deeplearning4j with Apache License 2.0

6 votes

/**
 * Convert a given Row to a list of writables, given the specified Schema
 *
 * @param schema Schema for the data
 * @param row    Row of data
 */
public static List<Writable> rowToWritables(Schema schema, Row row) {
    List<Writable> ret = new ArrayList<>();
    for (int i = 0; i < row.size(); i++) {
        switch (schema.getType(i)) {
            case Double:
                ret.add(new DoubleWritable(row.getDouble(i)));
                break;
            case Float:
                ret.add(new FloatWritable(row.getFloat(i)));
                break;
            case Integer:
                ret.add(new IntWritable(row.getInt(i)));
                break;
            case Long:
                ret.add(new LongWritable(row.getLong(i)));
                break;
            case String:
                ret.add(new Text(row.getString(i)));
                break;
            default:
                throw new IllegalStateException("Illegal type");
        }
    }
    return ret;
}

Example 9

Source File: ArrowUtils.java From konduit-serving with Apache License 2.0

5 votes

public static List<FieldVector> createFieldVectors(BufferAllocator bufferAllocator, Schema schema, int numRows) {
    List<FieldVector> ret = new ArrayList(schema.numColumns());

    for (int i = 0; i < schema.numColumns(); ++i) {
        switch (schema.getType(i)) {
            case Integer:
                ret.add(intVectorOf(bufferAllocator, schema.getName(i), numRows));
                break;
            case Float:
                ret.add(floatVectorOf(bufferAllocator, schema.getName(i), numRows));
                break;
            case Double:
                ret.add(doubleVectorOf(bufferAllocator, schema.getName(i), numRows));
                break;
            case Long:
                ret.add(longVectorOf(bufferAllocator, schema.getName(i), numRows));
                break;
            case NDArray:
                ret.add(ndarrayVectorOf(bufferAllocator, schema.getName(i), numRows));
                break;
            case Boolean:
                ret.add(booleanVectorOf(bufferAllocator, schema.getName(i), numRows));
                break;
            case Categorical:
                ret.add(stringVectorOf(bufferAllocator, schema.getName(i), numRows));
                break;
            case Time:
                ret.add(timeVectorOf(bufferAllocator, schema.getName(i), numRows));
                break;
            case Bytes:
            default:
                throw new IllegalArgumentException("Illegal type found for creation of field vectors" + schema.getType(i));
            case String:
                ret.add(stringVectorOf(bufferAllocator, schema.getName(i), numRows));
        }
    }

    return ret;
}

Example 10

Source File: ArrowConverter.java From DataVec with Apache License 2.0

5 votes

/**
 * Create an ndarray from a matrix.
 * The included batch must be all the same number of rows in order
 * to work. The reason for this is {@link INDArray} must be all the same dimensions.
 * Note that the input columns must also be numerical. If they aren't numerical already,
 * consider using an {@link org.datavec.api.transform.TransformProcess} to transform the data
 * output from {@link org.datavec.arrow.recordreader.ArrowRecordReader} in to the proper format
 * for usage with this method for direct conversion.
 *
 * @param arrowWritableRecordBatch the incoming batch. This is typically output from
 *                                 an {@link org.datavec.arrow.recordreader.ArrowRecordReader}
 * @return an {@link INDArray} representative of the input data
 */
public static INDArray toArray(ArrowWritableRecordBatch arrowWritableRecordBatch) {
    List<FieldVector> columnVectors = arrowWritableRecordBatch.getList();
    Schema schema = arrowWritableRecordBatch.getSchema();
    for(int i = 0; i < schema.numColumns(); i++) {
        switch(schema.getType(i)) {
            case Integer:
                break;
            case Float:
                break;
            case Double:
                break;
            case Long:
                break;
            default:
                throw new ND4JIllegalArgumentException("Illegal data type found for column " + schema.getName(i));
        }
    }

    int rows  = arrowWritableRecordBatch.getList().get(0).getValueCount();
    int cols = schema.numColumns();
    INDArray arr  = Nd4j.create(rows,cols);
    for(int i = 0; i < cols; i++) {
        INDArray put = ArrowConverter.convertArrowVector(columnVectors.get(i),schema.getType(i));
        switch(arr.data().dataType()) {
            case FLOAT:
                arr.putColumn(i,Nd4j.create(put.data().asFloat()).reshape(rows,1));
                break;
            case DOUBLE:
                arr.putColumn(i,Nd4j.create(put.data().asDouble()).reshape(rows,1));
                break;
        }

    }

    return arr;
}

Example 11

Source File: LocalTransformExecutor.java From DataVec with Apache License 2.0

5 votes

/**
 * Convert a string time series to
 * the proper writable set based on the schema.
 * Note that this does not use arrow.
 * This just uses normal writable objects.
 *
 * @param stringInput the string input
 * @param schema the schema to use
 * @return the converted records
 */
public static List<List<List<Writable>>> convertStringInputTimeSeries(List<List<List<String>>> stringInput,Schema schema) {
    List<List<List<Writable>>> ret = new ArrayList<>();
    for(int i = 0; i < stringInput.size(); i++) {
        List<List<String>> currInput = stringInput.get(i);
        List<List<Writable>> timeStepAdd = new ArrayList<>();
        for(int j = 0; j < currInput.size(); j++) {
            List<String> record = currInput.get(j);
            List<Writable> recordAdd = new ArrayList<>();
            for(int k = 0; k < record.size(); k++) {
                switch(schema.getType(k)) {
                    case Double: recordAdd.add(new DoubleWritable(Double.parseDouble(record.get(k)))); break;
                    case Float:  recordAdd.add(new FloatWritable(Float.parseFloat(record.get(k)))); break;
                    case Integer:  recordAdd.add(new IntWritable(Integer.parseInt(record.get(k)))); break;
                    case Long:  recordAdd.add(new LongWritable(Long.parseLong(record.get(k)))); break;
                    case String: recordAdd.add(new Text(record.get(k))); break;
                    case Time: recordAdd.add(new LongWritable(Long.parseLong(record.get(k)))); break;

                }
            }

            timeStepAdd.add(recordAdd);
        }

        ret.add(timeStepAdd);
    }

    return ret;
}

Example 12

Source File: ParseDoubleTransform.java From DataVec with Apache License 2.0

5 votes

/**
 * Get the output schema for this transformation, given an input schema
 *
 * @param inputSchema
 */
@Override
public Schema transform(Schema inputSchema) {
    Schema.Builder newSchema = new Schema.Builder();
    for (int i = 0; i < inputSchema.numColumns(); i++) {
        if (inputSchema.getType(i) == ColumnType.String) {
            newSchema.addColumnDouble(inputSchema.getMetaData(i).getName());
        } else
            newSchema.addColumn(inputSchema.getMetaData(i));

    }
    return newSchema.build();
}

Example 13

Source File: LocalTransformExecutor.java From deeplearning4j with Apache License 2.0

5 votes

/**
 * Convert a string time series to
 * the proper writable set based on the schema.
 * Note that this does not use arrow.
 * This just uses normal writable objects.
 *
 * @param stringInput the string input
 * @param schema the schema to use
 * @return the converted records
 */
public static List<List<List<Writable>>> convertStringInputTimeSeries(List<List<List<String>>> stringInput,Schema schema) {
    List<List<List<Writable>>> ret = new ArrayList<>();
    for(int i = 0; i < stringInput.size(); i++) {
        List<List<String>> currInput = stringInput.get(i);
        List<List<Writable>> timeStepAdd = new ArrayList<>();
        for(int j = 0; j < currInput.size(); j++) {
            List<String> record = currInput.get(j);
            List<Writable> recordAdd = new ArrayList<>();
            for(int k = 0; k < record.size(); k++) {
                switch(schema.getType(k)) {
                    case Double: recordAdd.add(new DoubleWritable(Double.parseDouble(record.get(k)))); break;
                    case Float:  recordAdd.add(new FloatWritable(Float.parseFloat(record.get(k)))); break;
                    case Integer:  recordAdd.add(new IntWritable(Integer.parseInt(record.get(k)))); break;
                    case Long:  recordAdd.add(new LongWritable(Long.parseLong(record.get(k)))); break;
                    case String: recordAdd.add(new Text(record.get(k))); break;
                    case Time: recordAdd.add(new LongWritable(Long.parseLong(record.get(k)))); break;

                }
            }

            timeStepAdd.add(recordAdd);
        }

        ret.add(timeStepAdd);
    }

    return ret;
}

Example 14

Source File: ParseDoubleTransform.java From deeplearning4j with Apache License 2.0

5 votes

/**
 * Get the output schema for this transformation, given an input schema
 *
 * @param inputSchema
 */
@Override
public Schema transform(Schema inputSchema) {
    Schema.Builder newSchema = new Schema.Builder();
    for (int i = 0; i < inputSchema.numColumns(); i++) {
        if (inputSchema.getType(i) == ColumnType.String) {
            newSchema.addColumnDouble(inputSchema.getMetaData(i).getName());
        } else
            newSchema.addColumn(inputSchema.getMetaData(i));

    }
    return newSchema.build();
}

Example 15

Source File: PythonUtils.java From deeplearning4j with Apache License 2.0

5 votes

/**
 * Create a {@link Schema} from an input
 * {@link PythonVariables}
 * Types are mapped to types of the same name
 *
 * @param input the input schema
 * @return the output python variables.
 */
public static PythonVariables fromSchema(Schema input) {
    PythonVariables ret = new PythonVariables();
    for (int i = 0; i < input.numColumns(); i++) {
        String currColumnName = input.getName(i);
        ColumnType columnType = input.getType(i);
        switch (columnType) {
            case NDArray:
                ret.add(currColumnName, PythonType.NDARRAY);
                break;
            case Boolean:
                ret.add(currColumnName, PythonType.BOOL);
                break;
            case Categorical:
            case String:
                ret.add(currColumnName, PythonType.STR);
                break;
            case Double:
            case Float:
                ret.add(currColumnName, PythonType.FLOAT);
                break;
            case Integer:
            case Long:
                ret.add(currColumnName, PythonType.INT);
                break;
            case Bytes:
                ret.add(currColumnName, PythonType.BYTES);
                break;
            case Time:
                throw new UnsupportedOperationException("Unable to process dates with python yet.");
        }
    }

    return ret;
}

Example 16

Source File: PythonUtils.java From deeplearning4j with Apache License 2.0

5 votes

/**
 * Convert a {@link Schema}
 * to {@link PythonVariables}
 *
 * @param schema the input schema
 * @return the output {@link PythonVariables} where each
 * name in the map is associated with a column name in the schema.
 * A proper type is also chosen based on the schema
 * @throws Exception
 */
public static PythonVariables schemaToPythonVariables(Schema schema) throws Exception {
    PythonVariables pyVars = new PythonVariables();
    int numCols = schema.numColumns();
    for (int i = 0; i < numCols; i++) {
        String colName = schema.getName(i);
        ColumnType colType = schema.getType(i);
        switch (colType) {
            case Long:
            case Integer:
                pyVars.addInt(colName);
                break;
            case Double:
            case Float:
                pyVars.addFloat(colName);
                break;
            case String:
                pyVars.addStr(colName);
                break;
            case NDArray:
                pyVars.addNDArray(colName);
                break;
            case Boolean:
                pyVars.addBool(colName);
                break;
            default:
                throw new Exception("Unsupported python input type: " + colType.toString());
        }
    }

    return pyVars;
}