Java Code Examples for org.datavec.api.transform.schema.Schema#getName()

The following examples show how to use org.datavec.api.transform.schema.Schema#getName() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DataFrames.java    From DataVec with Apache License 2.0 6 votes vote down vote up
/**
 * Convert a datavec schema to a
 * struct type in spark
 *
 * @param schema the schema to convert
 * @return the datavec struct type
 */
public static StructType fromSchema(Schema schema) {
    StructField[] structFields = new StructField[schema.numColumns()];
    for (int i = 0; i < structFields.length; i++) {
        switch (schema.getColumnTypes().get(i)) {
            case Double:
                structFields[i] = new StructField(schema.getName(i), DataTypes.DoubleType, false, Metadata.empty());
                break;
            case Integer:
                structFields[i] =
                                new StructField(schema.getName(i), DataTypes.IntegerType, false, Metadata.empty());
                break;
            case Long:
                structFields[i] = new StructField(schema.getName(i), DataTypes.LongType, false, Metadata.empty());
                break;
            case Float:
                structFields[i] = new StructField(schema.getName(i), DataTypes.FloatType, false, Metadata.empty());
                break;
            default:
                throw new IllegalStateException(
                                "This api should not be used with strings , binary data or ndarrays. This is only for columnar data");
        }
    }
    return new StructType(structFields);
}
 
Example 2
Source File: DataFrames.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/**
 * Convert a datavec schema to a
 * struct type in spark
 *
 * @param schema the schema to convert
 * @return the datavec struct type
 */
public static StructType fromSchema(Schema schema) {
    StructField[] structFields = new StructField[schema.numColumns()];
    for (int i = 0; i < structFields.length; i++) {
        switch (schema.getColumnTypes().get(i)) {
            case Double:
                structFields[i] = new StructField(schema.getName(i), DataTypes.DoubleType, false, Metadata.empty());
                break;
            case Integer:
                structFields[i] =
                                new StructField(schema.getName(i), DataTypes.IntegerType, false, Metadata.empty());
                break;
            case Long:
                structFields[i] = new StructField(schema.getName(i), DataTypes.LongType, false, Metadata.empty());
                break;
            case Float:
                structFields[i] = new StructField(schema.getName(i), DataTypes.FloatType, false, Metadata.empty());
                break;
            default:
                throw new IllegalStateException(
                                "This api should not be used with strings , binary data or ndarrays. This is only for columnar data");
        }
    }
    return new StructType(structFields);
}
 
Example 3
Source File: ArrowUtils.java    From konduit-serving with Apache License 2.0 5 votes vote down vote up
public static ArrowWritableRecordBatch asDataVecBatch(ArrowRecordBatch arrowRecordBatch, Schema schema, VectorSchemaRoot vectorLoader) {
    List<FieldVector> fieldVectors = new ArrayList();

    for (int j = 0; j < schema.numColumns(); ++j) {
        String name = schema.getName(j);
        FieldVector fieldVector = vectorLoader.getVector(name);
        fieldVectors.add(fieldVector);
    }

    ArrowWritableRecordBatch ret = new ArrowWritableRecordBatch(fieldVectors, schema);
    ret.setArrowRecordBatch(arrowRecordBatch);
    return ret;
}
 
Example 4
Source File: ArrowConverter.java    From DataVec with Apache License 2.0 5 votes vote down vote up
/**
 * Create an ndarray from a matrix.
 * The included batch must be all the same number of rows in order
 * to work. The reason for this is {@link INDArray} must be all the same dimensions.
 * Note that the input columns must also be numerical. If they aren't numerical already,
 * consider using an {@link org.datavec.api.transform.TransformProcess} to transform the data
 * output from {@link org.datavec.arrow.recordreader.ArrowRecordReader} in to the proper format
 * for usage with this method for direct conversion.
 *
 * @param arrowWritableRecordBatch the incoming batch. This is typically output from
 *                                 an {@link org.datavec.arrow.recordreader.ArrowRecordReader}
 * @return an {@link INDArray} representative of the input data
 */
public static INDArray toArray(ArrowWritableRecordBatch arrowWritableRecordBatch) {
    List<FieldVector> columnVectors = arrowWritableRecordBatch.getList();
    Schema schema = arrowWritableRecordBatch.getSchema();
    for(int i = 0; i < schema.numColumns(); i++) {
        switch(schema.getType(i)) {
            case Integer:
                break;
            case Float:
                break;
            case Double:
                break;
            case Long:
                break;
            default:
                throw new ND4JIllegalArgumentException("Illegal data type found for column " + schema.getName(i));
        }
    }

    int rows  = arrowWritableRecordBatch.getList().get(0).getValueCount();
    int cols = schema.numColumns();
    INDArray arr  = Nd4j.create(rows,cols);
    for(int i = 0; i < cols; i++) {
        INDArray put = ArrowConverter.convertArrowVector(columnVectors.get(i),schema.getType(i));
        switch(arr.data().dataType()) {
            case FLOAT:
                arr.putColumn(i,Nd4j.create(put.data().asFloat()).reshape(rows,1));
                break;
            case DOUBLE:
                arr.putColumn(i,Nd4j.create(put.data().asDouble()).reshape(rows,1));
                break;
        }

    }

    return arr;
}
 
Example 5
Source File: ArrowConverter.java    From DataVec with Apache License 2.0 5 votes vote down vote up
private static ArrowWritableRecordBatch asDataVecBatch(ArrowRecordBatch arrowRecordBatch, Schema schema, VectorSchemaRoot vectorLoader) {
    //iterate column wise over the feature vectors, returning entries
    List<FieldVector> fieldVectors = new ArrayList<>();
    for(int j = 0; j < schema.numColumns(); j++) {
        String name = schema.getName(j);
        FieldVector fieldVector = vectorLoader.getVector(name);
        fieldVectors.add(fieldVector);
    }

    ArrowWritableRecordBatch ret = new ArrowWritableRecordBatch(fieldVectors, schema);
    ret.setArrowRecordBatch(arrowRecordBatch);

    return ret;
}
 
Example 6
Source File: DataFrames.java    From DataVec with Apache License 2.0 5 votes vote down vote up
/**
 * Convert the DataVec sequence schema to a StructType for Spark, for example for use in
 * {@link #toDataFrameSequence(Schema, JavaRDD)}}
 * <b>Note</b>: as per {@link #toDataFrameSequence(Schema, JavaRDD)}}, the StructType has two additional columns added to it:<br>
 * - Column 0: Sequence UUID (name: {@link #SEQUENCE_UUID_COLUMN}) - a UUID for the original sequence<br>
 * - Column 1: Sequence index (name: {@link #SEQUENCE_INDEX_COLUMN} - an index (integer, starting at 0) for the position
 * of this record in the original time series.<br>
 * These two columns are required if the data is to be converted back into a sequence at a later point, for example
 * using {@link #toRecordsSequence(DataRowsFacade)}
 *
 * @param schema Schema to convert
 * @return StructType for the schema
 */
public static StructType fromSchemaSequence(Schema schema) {
    StructField[] structFields = new StructField[schema.numColumns() + 2];

    structFields[0] = new StructField(SEQUENCE_UUID_COLUMN, DataTypes.StringType, false, Metadata.empty());
    structFields[1] = new StructField(SEQUENCE_INDEX_COLUMN, DataTypes.IntegerType, false, Metadata.empty());

    for (int i = 0; i < schema.numColumns(); i++) {
        switch (schema.getColumnTypes().get(i)) {
            case Double:
                structFields[i + 2] =
                                new StructField(schema.getName(i), DataTypes.DoubleType, false, Metadata.empty());
                break;
            case Integer:
                structFields[i + 2] =
                                new StructField(schema.getName(i), DataTypes.IntegerType, false, Metadata.empty());
                break;
            case Long:
                structFields[i + 2] =
                                new StructField(schema.getName(i), DataTypes.LongType, false, Metadata.empty());
                break;
            case Float:
                structFields[i + 2] =
                                new StructField(schema.getName(i), DataTypes.FloatType, false, Metadata.empty());
                break;
            default:
                throw new IllegalStateException(
                                "This api should not be used with strings , binary data or ndarrays. This is only for columnar data");
        }
    }
    return new StructType(structFields);
}
 
Example 7
Source File: ArrowConverter.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
private static ArrowWritableRecordBatch asDataVecBatch(ArrowRecordBatch arrowRecordBatch, Schema schema, VectorSchemaRoot vectorLoader) {
    //iterate column wise over the feature vectors, returning entries
    List<FieldVector> fieldVectors = new ArrayList<>();
    for(int j = 0; j < schema.numColumns(); j++) {
        String name = schema.getName(j);
        FieldVector fieldVector = vectorLoader.getVector(name);
        fieldVectors.add(fieldVector);
    }

    ArrowWritableRecordBatch ret = new ArrowWritableRecordBatch(fieldVectors, schema);
    ret.setArrowRecordBatch(arrowRecordBatch);

    return ret;
}
 
Example 8
Source File: PythonUtils.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * Create a {@link Schema} from an input
 * {@link PythonVariables}
 * Types are mapped to types of the same name
 *
 * @param input the input schema
 * @return the output python variables.
 */
public static PythonVariables fromSchema(Schema input) {
    PythonVariables ret = new PythonVariables();
    for (int i = 0; i < input.numColumns(); i++) {
        String currColumnName = input.getName(i);
        ColumnType columnType = input.getType(i);
        switch (columnType) {
            case NDArray:
                ret.add(currColumnName, PythonType.NDARRAY);
                break;
            case Boolean:
                ret.add(currColumnName, PythonType.BOOL);
                break;
            case Categorical:
            case String:
                ret.add(currColumnName, PythonType.STR);
                break;
            case Double:
            case Float:
                ret.add(currColumnName, PythonType.FLOAT);
                break;
            case Integer:
            case Long:
                ret.add(currColumnName, PythonType.INT);
                break;
            case Bytes:
                ret.add(currColumnName, PythonType.BYTES);
                break;
            case Time:
                throw new UnsupportedOperationException("Unable to process dates with python yet.");
        }
    }

    return ret;
}
 
Example 9
Source File: PythonUtils.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * Convert a {@link Schema}
 * to {@link PythonVariables}
 *
 * @param schema the input schema
 * @return the output {@link PythonVariables} where each
 * name in the map is associated with a column name in the schema.
 * A proper type is also chosen based on the schema
 * @throws Exception
 */
public static PythonVariables schemaToPythonVariables(Schema schema) throws Exception {
    PythonVariables pyVars = new PythonVariables();
    int numCols = schema.numColumns();
    for (int i = 0; i < numCols; i++) {
        String colName = schema.getName(i);
        ColumnType colType = schema.getType(i);
        switch (colType) {
            case Long:
            case Integer:
                pyVars.addInt(colName);
                break;
            case Double:
            case Float:
                pyVars.addFloat(colName);
                break;
            case String:
                pyVars.addStr(colName);
                break;
            case NDArray:
                pyVars.addNDArray(colName);
                break;
            case Boolean:
                pyVars.addBool(colName);
                break;
            default:
                throw new Exception("Unsupported python input type: " + colType.toString());
        }
    }

    return pyVars;
}
 
Example 10
Source File: DataFrames.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * Convert the DataVec sequence schema to a StructType for Spark, for example for use in
 * {@link #toDataFrameSequence(Schema, JavaRDD)}}
 * <b>Note</b>: as per {@link #toDataFrameSequence(Schema, JavaRDD)}}, the StructType has two additional columns added to it:<br>
 * - Column 0: Sequence UUID (name: {@link #SEQUENCE_UUID_COLUMN}) - a UUID for the original sequence<br>
 * - Column 1: Sequence index (name: {@link #SEQUENCE_INDEX_COLUMN} - an index (integer, starting at 0) for the position
 * of this record in the original time series.<br>
 * These two columns are required if the data is to be converted back into a sequence at a later point, for example
 * using {@link #toRecordsSequence(Dataset<Row>)}
 *
 * @param schema Schema to convert
 * @return StructType for the schema
 */
public static StructType fromSchemaSequence(Schema schema) {
    StructField[] structFields = new StructField[schema.numColumns() + 2];

    structFields[0] = new StructField(SEQUENCE_UUID_COLUMN, DataTypes.StringType, false, Metadata.empty());
    structFields[1] = new StructField(SEQUENCE_INDEX_COLUMN, DataTypes.IntegerType, false, Metadata.empty());

    for (int i = 0; i < schema.numColumns(); i++) {
        switch (schema.getColumnTypes().get(i)) {
            case Double:
                structFields[i + 2] =
                                new StructField(schema.getName(i), DataTypes.DoubleType, false, Metadata.empty());
                break;
            case Integer:
                structFields[i + 2] =
                                new StructField(schema.getName(i), DataTypes.IntegerType, false, Metadata.empty());
                break;
            case Long:
                structFields[i + 2] =
                                new StructField(schema.getName(i), DataTypes.LongType, false, Metadata.empty());
                break;
            case Float:
                structFields[i + 2] =
                                new StructField(schema.getName(i), DataTypes.FloatType, false, Metadata.empty());
                break;
            default:
                throw new IllegalStateException(
                                "This api should not be used with strings , binary data or ndarrays. This is only for columnar data");
        }
    }
    return new StructType(structFields);
}